import pandas as pd
df = pd.read_excel (r'C:\Users\jupe\Desktop\Praktik\210\210 - Forecast input.xlsx',sheet_name='Elcon total omsætning')
df.Periode = pd.to_datetime(df.Periode)
df2 = df
df = df.set_index('Periode')
from statsmodels.tsa.seasonal import seasonal_decompose
results = seasonal_decompose(df, model='additive')
trend_estimate = results.trend
periodic_estimate = results.seasonal
residual = results.resid
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df)
ax.set(xlabel="År og måned",
ylabel="Omsætning(100 mdkk)",
title="Udvikling i omsætning")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df.index[0], 'Y')
datemax = np.datetime64(df.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(trend_estimate)
ax.set(xlabel="År og måned",
ylabel="Omsætning(10 mdkk)",
title="Trend")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(trend_estimate.index[0], 'Y')
datemax = np.datetime64(trend_estimate.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(periodic_estimate)
ax.set(xlabel="År og måned",
ylabel="Omsætning(10 mdkk)",
title="Sæson effekt")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(periodic_estimate.index[0], 'Y')
datemax = np.datetime64(periodic_estimate.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(residual)
ax.set(xlabel="År og måned",
ylabel="Omsætning(10 mdkk)",
title="Afvigelser")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(residual.index[0], 'Y')
datemax = np.datetime64(residual.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df = pd.read_excel (r'C:\Users\jupe\Desktop\Praktik\210\210 - Forecast input.xlsx',sheet_name='SARIMA input',
skiprows=1)
df.Periode = pd.to_datetime(df.Periode)
import warnings
warnings.filterwarnings("ignore")
df.Periode = pd.to_datetime(df.Periode)
df.train = df.loc[df['Periode'].dt.year<2021]
df.test = df.loc[(df['Periode'].dt.year==2021)&(df['Periode'].dt.month<=11)]
df.train = df.train.set_index('Periode')
df.test = df.test.set_index('Periode')
df.forecast = df.loc[df['Periode'].dt.year>=2021]
df.forecast = df.forecast.set_index('Periode')
import pmdarima as pm
from sklearn.metrics import mean_squared_error
n_periods = 36
n_periods_index = n_periods+1
index_of_fc = pd.date_range(df.train.index[-1], periods = n_periods_index, freq='MS', closed = 'right')
modeldescription = pd.DataFrame()
forecast = pd.DataFrame()
rmse = pd.DataFrame()
for i in range (0,len(df.train.columns)):
df.train.afdeling = df.train.iloc[:,[i]]
df.train.afdeling=df.train.afdeling.dropna()
smodel = pm.auto_arima(df.train.afdeling, start_p=1, start_q=1,
test='adf',
max_p=3, max_q=3, m=12,
start_P=0, seasonal=True,
d=None, D=1, trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True)
# Forecast
fitted, confint = smodel.predict(n_periods=n_periods,
return_conf_int=True)
# Save forecast and model performance measurements
modeldescription[df.train.columns[i]] = pd.Series(smodel)
Y_true = df.test.iloc[:,[i]]
Y_pred = fitted[0:11]
for j in range (1,len(Y_true)+1):
forecast_errors = [Y_true[j-1:j]-Y_pred[j-1:j]]
rmse[df.train.columns[i]] = pd.Series(mean_squared_error(Y_true,Y_pred,squared=False), index = ['RMSE'])
forecast[df.train.columns[i]] = pd.Series(fitted, index=index_of_fc)
Performing stepwise search to minimize aic ARIMA(1,2,1)(0,1,1)[12] : AIC=993.009, Time=0.12 sec ARIMA(0,2,0)(0,1,0)[12] : AIC=1042.978, Time=0.01 sec ARIMA(1,2,0)(1,1,0)[12] : AIC=1017.817, Time=0.07 sec ARIMA(0,2,1)(0,1,1)[12] : AIC=995.154, Time=0.10 sec ARIMA(1,2,1)(0,1,0)[12] : AIC=inf, Time=0.10 sec ARIMA(1,2,1)(1,1,1)[12] : AIC=994.918, Time=0.28 sec ARIMA(1,2,1)(0,1,2)[12] : AIC=994.867, Time=0.41 sec ARIMA(1,2,1)(1,1,0)[12] : AIC=998.456, Time=0.18 sec ARIMA(1,2,1)(1,1,2)[12] : AIC=996.549, Time=0.63 sec ARIMA(1,2,0)(0,1,1)[12] : AIC=1014.385, Time=0.07 sec ARIMA(2,2,1)(0,1,1)[12] : AIC=994.632, Time=0.16 sec ARIMA(1,2,2)(0,1,1)[12] : AIC=992.512, Time=0.23 sec ARIMA(1,2,2)(0,1,0)[12] : AIC=inf, Time=0.17 sec ARIMA(1,2,2)(1,1,1)[12] : AIC=994.444, Time=0.31 sec ARIMA(1,2,2)(0,1,2)[12] : AIC=994.401, Time=0.57 sec ARIMA(1,2,2)(1,1,0)[12] : AIC=998.748, Time=0.25 sec ARIMA(1,2,2)(1,1,2)[12] : AIC=996.376, Time=1.12 sec ARIMA(0,2,2)(0,1,1)[12] : AIC=992.519, Time=0.13 sec ARIMA(2,2,2)(0,1,1)[12] : AIC=993.649, Time=0.28 sec ARIMA(1,2,3)(0,1,1)[12] : AIC=992.010, Time=0.23 sec ARIMA(1,2,3)(0,1,0)[12] : AIC=inf, Time=0.23 sec ARIMA(1,2,3)(1,1,1)[12] : AIC=993.931, Time=0.36 sec ARIMA(1,2,3)(0,1,2)[12] : AIC=993.887, Time=0.54 sec ARIMA(1,2,3)(1,1,0)[12] : AIC=inf, Time=0.45 sec ARIMA(1,2,3)(1,1,2)[12] : AIC=995.890, Time=1.09 sec ARIMA(0,2,3)(0,1,1)[12] : AIC=992.252, Time=0.19 sec ARIMA(2,2,3)(0,1,1)[12] : AIC=993.978, Time=0.28 sec ARIMA(1,2,3)(0,1,1)[12] intercept : AIC=996.310, Time=0.35 sec Best model: ARIMA(1,2,3)(0,1,1)[12] Total fit time: 8.930 seconds Performing stepwise search to minimize aic ARIMA(1,2,1)(0,1,1)[12] : AIC=965.822, Time=0.10 sec ARIMA(0,2,0)(0,1,0)[12] : AIC=1012.913, Time=0.01 sec ARIMA(1,2,0)(1,1,0)[12] : AIC=985.118, Time=0.06 sec ARIMA(0,2,1)(0,1,1)[12] : AIC=968.487, Time=0.09 sec ARIMA(1,2,1)(0,1,0)[12] : AIC=967.786, Time=0.08 sec ARIMA(1,2,1)(1,1,1)[12] : AIC=967.771, Time=0.14 sec ARIMA(1,2,1)(0,1,2)[12] : AIC=967.754, Time=0.23 sec ARIMA(1,2,1)(1,1,0)[12] : AIC=968.113, Time=0.14 sec ARIMA(1,2,1)(1,1,2)[12] : AIC=969.743, Time=0.33 sec ARIMA(1,2,0)(0,1,1)[12] : AIC=981.362, Time=0.07 sec ARIMA(2,2,1)(0,1,1)[12] : AIC=966.698, Time=0.15 sec ARIMA(1,2,2)(0,1,1)[12] : AIC=967.965, Time=0.18 sec ARIMA(0,2,0)(0,1,1)[12] : AIC=999.809, Time=0.02 sec ARIMA(0,2,2)(0,1,1)[12] : AIC=966.369, Time=0.13 sec ARIMA(2,2,0)(0,1,1)[12] : AIC=976.583, Time=0.10 sec ARIMA(2,2,2)(0,1,1)[12] : AIC=970.737, Time=0.33 sec ARIMA(1,2,1)(0,1,1)[12] intercept : AIC=967.992, Time=0.17 sec Best model: ARIMA(1,2,1)(0,1,1)[12] Total fit time: 2.332 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=1055.200, Time=0.12 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=1070.169, Time=0.01 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=1060.939, Time=0.05 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=1053.675, Time=0.07 sec ARIMA(0,1,1)(0,1,0)[12] : AIC=1065.375, Time=0.02 sec ARIMA(0,1,1)(1,1,1)[12] : AIC=1055.662, Time=0.11 sec ARIMA(0,1,1)(0,1,2)[12] : AIC=1055.640, Time=0.21 sec ARIMA(0,1,1)(1,1,0)[12] : AIC=1060.660, Time=0.06 sec ARIMA(0,1,1)(1,1,2)[12] : AIC=1056.143, Time=0.31 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=1057.735, Time=0.03 sec ARIMA(0,1,2)(0,1,1)[12] : AIC=1056.695, Time=0.07 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=1055.073, Time=0.06 sec ARIMA(1,1,2)(0,1,1)[12] : AIC=1058.569, Time=0.11 sec ARIMA(0,1,1)(0,1,1)[12] intercept : AIC=1065.543, Time=0.09 sec Best model: ARIMA(0,1,1)(0,1,1)[12] Total fit time: 1.328 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=997.821, Time=0.17 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=1014.617, Time=0.01 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=1001.649, Time=0.06 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=996.092, Time=0.07 sec ARIMA(0,1,1)(0,1,0)[12] : AIC=1002.062, Time=0.03 sec ARIMA(0,1,1)(1,1,1)[12] : AIC=996.339, Time=0.19 sec ARIMA(0,1,1)(0,1,2)[12] : AIC=994.746, Time=0.24 sec ARIMA(0,1,1)(1,1,2)[12] : AIC=995.426, Time=0.30 sec ARIMA(0,1,0)(0,1,2)[12] : AIC=1003.152, Time=0.15 sec ARIMA(1,1,1)(0,1,2)[12] : AIC=996.150, Time=0.54 sec ARIMA(0,1,2)(0,1,2)[12] : AIC=995.203, Time=0.22 sec ARIMA(1,1,0)(0,1,2)[12] : AIC=995.167, Time=0.19 sec ARIMA(1,1,2)(0,1,2)[12] : AIC=996.726, Time=0.32 sec ARIMA(0,1,1)(0,1,2)[12] intercept : AIC=1006.052, Time=0.33 sec Best model: ARIMA(0,1,1)(0,1,2)[12] Total fit time: 2.827 seconds Performing stepwise search to minimize aic ARIMA(1,2,1)(0,1,1)[12] : AIC=1036.183, Time=0.13 sec ARIMA(0,2,0)(0,1,0)[12] : AIC=1074.743, Time=0.01 sec ARIMA(1,2,0)(1,1,0)[12] : AIC=1050.719, Time=0.05 sec ARIMA(0,2,1)(0,1,1)[12] : AIC=1037.491, Time=0.07 sec ARIMA(1,2,1)(0,1,0)[12] : AIC=1042.144, Time=0.06 sec ARIMA(1,2,1)(1,1,1)[12] : AIC=1037.898, Time=0.13 sec ARIMA(1,2,1)(0,1,2)[12] : AIC=1037.804, Time=0.22 sec ARIMA(1,2,1)(1,1,0)[12] : AIC=1038.153, Time=0.10 sec ARIMA(1,2,1)(1,1,2)[12] : AIC=1039.726, Time=0.52 sec ARIMA(1,2,0)(0,1,1)[12] : AIC=1047.785, Time=0.05 sec ARIMA(2,2,1)(0,1,1)[12] : AIC=1036.580, Time=0.24 sec ARIMA(1,2,2)(0,1,1)[12] : AIC=1032.038, Time=0.22 sec ARIMA(1,2,2)(0,1,0)[12] : AIC=inf, Time=0.12 sec ARIMA(1,2,2)(1,1,1)[12] : AIC=1033.608, Time=0.23 sec ARIMA(1,2,2)(0,1,2)[12] : AIC=1033.439, Time=0.30 sec ARIMA(1,2,2)(1,1,0)[12] : AIC=1035.445, Time=0.19 sec ARIMA(1,2,2)(1,1,2)[12] : AIC=1035.279, Time=0.78 sec ARIMA(0,2,2)(0,1,1)[12] : AIC=1030.997, Time=0.09 sec ARIMA(0,2,2)(0,1,0)[12] : AIC=1040.361, Time=0.13 sec ARIMA(0,2,2)(1,1,1)[12] : AIC=1032.630, Time=0.18 sec ARIMA(0,2,2)(0,1,2)[12] : AIC=1032.485, Time=0.29 sec ARIMA(0,2,2)(1,1,0)[12] : AIC=1034.275, Time=0.10 sec ARIMA(0,2,2)(1,1,2)[12] : AIC=1034.333, Time=0.75 sec ARIMA(0,2,3)(0,1,1)[12] : AIC=1035.159, Time=0.12 sec ARIMA(1,2,3)(0,1,1)[12] : AIC=1035.757, Time=0.29 sec ARIMA(0,2,2)(0,1,1)[12] intercept : AIC=1039.332, Time=0.13 sec Best model: ARIMA(0,2,2)(0,1,1)[12] Total fit time: 5.531 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=999.603, Time=0.10 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=1014.372, Time=0.01 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=1004.728, Time=0.05 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=998.074, Time=0.05 sec ARIMA(0,1,1)(0,1,0)[12] : AIC=1008.816, Time=0.02 sec ARIMA(0,1,1)(1,1,1)[12] : AIC=999.704, Time=0.11 sec ARIMA(0,1,1)(0,1,2)[12] : AIC=999.387, Time=0.26 sec ARIMA(0,1,1)(1,1,0)[12] : AIC=1002.505, Time=0.07 sec ARIMA(0,1,1)(1,1,2)[12] : AIC=1001.147, Time=0.53 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=1000.582, Time=0.06 sec ARIMA(0,1,2)(0,1,1)[12] : AIC=994.747, Time=0.17 sec ARIMA(0,1,2)(0,1,0)[12] : AIC=1007.345, Time=0.09 sec ARIMA(0,1,2)(1,1,1)[12] : AIC=995.862, Time=0.17 sec ARIMA(0,1,2)(0,1,2)[12] : AIC=995.240, Time=0.19 sec ARIMA(0,1,2)(1,1,0)[12] : AIC=998.097, Time=0.08 sec ARIMA(0,1,2)(1,1,2)[12] : AIC=997.125, Time=0.37 sec ARIMA(1,1,2)(0,1,1)[12] : AIC=990.273, Time=0.21 sec ARIMA(1,1,2)(0,1,0)[12] : AIC=inf, Time=0.18 sec ARIMA(1,1,2)(1,1,1)[12] : AIC=991.795, Time=0.20 sec ARIMA(1,1,2)(0,1,2)[12] : AIC=991.284, Time=0.31 sec ARIMA(1,1,2)(1,1,0)[12] : AIC=995.274, Time=0.15 sec ARIMA(1,1,2)(1,1,2)[12] : AIC=992.970, Time=0.57 sec ARIMA(2,1,2)(0,1,1)[12] : AIC=990.991, Time=0.18 sec ARIMA(1,1,3)(0,1,1)[12] : AIC=990.369, Time=0.29 sec ARIMA(0,1,3)(0,1,1)[12] : AIC=994.413, Time=0.15 sec ARIMA(2,1,1)(0,1,1)[12] : AIC=992.482, Time=0.11 sec ARIMA(2,1,3)(0,1,1)[12] : AIC=990.270, Time=0.39 sec ARIMA(2,1,3)(0,1,0)[12] : AIC=inf, Time=0.33 sec ARIMA(2,1,3)(1,1,1)[12] : AIC=991.956, Time=0.51 sec ARIMA(2,1,3)(0,1,2)[12] : AIC=991.150, Time=0.72 sec ARIMA(2,1,3)(1,1,0)[12] : AIC=997.020, Time=0.38 sec ARIMA(2,1,3)(1,1,2)[12] : AIC=992.627, Time=1.34 sec ARIMA(3,1,3)(0,1,1)[12] : AIC=991.100, Time=0.40 sec ARIMA(3,1,2)(0,1,1)[12] : AIC=992.248, Time=0.33 sec ARIMA(2,1,3)(0,1,1)[12] intercept : AIC=992.817, Time=0.40 sec Best model: ARIMA(2,1,3)(0,1,1)[12] Total fit time: 9.514 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=949.166, Time=0.12 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=964.865, Time=0.00 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=954.775, Time=0.05 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=947.201, Time=0.07 sec ARIMA(0,1,1)(0,1,0)[12] : AIC=954.921, Time=0.03 sec ARIMA(0,1,1)(1,1,1)[12] : AIC=949.129, Time=0.10 sec ARIMA(0,1,1)(0,1,2)[12] : AIC=949.070, Time=0.24 sec ARIMA(0,1,1)(1,1,0)[12] : AIC=952.074, Time=0.06 sec ARIMA(0,1,1)(1,1,2)[12] : AIC=950.650, Time=0.32 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=953.360, Time=0.04 sec ARIMA(0,1,2)(0,1,1)[12] : AIC=950.184, Time=0.08 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=949.871, Time=0.05 sec ARIMA(1,1,2)(0,1,1)[12] : AIC=952.079, Time=0.15 sec ARIMA(0,1,1)(0,1,1)[12] intercept : AIC=950.721, Time=0.11 sec Best model: ARIMA(0,1,1)(0,1,1)[12] Total fit time: 1.408 seconds Performing stepwise search to minimize aic ARIMA(1,2,1)(0,1,1)[12] : AIC=999.199, Time=0.11 sec ARIMA(0,2,0)(0,1,0)[12] : AIC=1040.223, Time=0.01 sec ARIMA(1,2,0)(1,1,0)[12] : AIC=1013.405, Time=0.07 sec ARIMA(0,2,1)(0,1,1)[12] : AIC=1007.281, Time=0.09 sec ARIMA(1,2,1)(0,1,0)[12] : AIC=1000.388, Time=0.05 sec ARIMA(1,2,1)(1,1,1)[12] : AIC=1001.004, Time=0.19 sec ARIMA(1,2,1)(0,1,2)[12] : AIC=1000.915, Time=0.25 sec ARIMA(1,2,1)(1,1,0)[12] : AIC=1000.795, Time=0.10 sec ARIMA(1,2,1)(1,1,2)[12] : AIC=1002.834, Time=0.40 sec ARIMA(1,2,0)(0,1,1)[12] : AIC=1009.131, Time=0.05 sec ARIMA(2,2,1)(0,1,1)[12] : AIC=1000.821, Time=0.13 sec ARIMA(1,2,2)(0,1,1)[12] : AIC=999.318, Time=0.15 sec ARIMA(0,2,0)(0,1,1)[12] : AIC=1030.052, Time=0.05 sec ARIMA(0,2,2)(0,1,1)[12] : AIC=1005.325, Time=0.14 sec ARIMA(2,2,0)(0,1,1)[12] : AIC=1009.954, Time=0.11 sec ARIMA(2,2,2)(0,1,1)[12] : AIC=1001.288, Time=0.39 sec ARIMA(1,2,1)(0,1,1)[12] intercept : AIC=1009.409, Time=0.20 sec Best model: ARIMA(1,2,1)(0,1,1)[12] Total fit time: 2.521 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=1054.067, Time=0.15 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=1065.586, Time=0.01 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=1059.288, Time=0.05 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=1053.168, Time=0.07 sec ARIMA(0,1,1)(0,1,0)[12] : AIC=1065.138, Time=0.02 sec ARIMA(0,1,1)(1,1,1)[12] : AIC=1054.766, Time=0.13 sec ARIMA(0,1,1)(0,1,2)[12] : AIC=1054.674, Time=0.17 sec ARIMA(0,1,1)(1,1,0)[12] : AIC=1057.510, Time=0.05 sec ARIMA(0,1,1)(1,1,2)[12] : AIC=1056.615, Time=0.35 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=1054.753, Time=0.04 sec ARIMA(0,1,2)(0,1,1)[12] : AIC=1053.593, Time=0.07 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=1055.506, Time=0.08 sec ARIMA(1,1,2)(0,1,1)[12] : AIC=1055.531, Time=0.16 sec ARIMA(0,1,1)(0,1,1)[12] intercept : AIC=1056.184, Time=0.08 sec Best model: ARIMA(0,1,1)(0,1,1)[12] Total fit time: 1.436 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=1048.324, Time=0.07 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=1067.533, Time=0.03 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=1051.291, Time=0.08 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=1052.020, Time=0.04 sec ARIMA(1,1,1)(0,1,0)[12] : AIC=1057.182, Time=0.06 sec ARIMA(1,1,1)(1,1,1)[12] : AIC=1049.993, Time=0.13 sec ARIMA(1,1,1)(0,1,2)[12] : AIC=1049.666, Time=0.25 sec ARIMA(1,1,1)(1,1,0)[12] : AIC=1052.558, Time=0.10 sec ARIMA(1,1,1)(1,1,2)[12] : AIC=1052.005, Time=0.46 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=1047.099, Time=0.05 sec ARIMA(1,1,0)(0,1,0)[12] : AIC=1056.529, Time=0.01 sec ARIMA(1,1,0)(1,1,1)[12] : AIC=1048.639, Time=0.08 sec ARIMA(1,1,0)(0,1,2)[12] : AIC=1048.267, Time=0.18 sec ARIMA(1,1,0)(1,1,2)[12] : AIC=1050.123, Time=0.34 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=1053.347, Time=0.03 sec ARIMA(2,1,0)(0,1,1)[12] : AIC=1048.937, Time=0.05 sec ARIMA(2,1,1)(0,1,1)[12] : AIC=1049.403, Time=0.20 sec ARIMA(1,1,0)(0,1,1)[12] intercept : AIC=1049.081, Time=0.06 sec Best model: ARIMA(1,1,0)(0,1,1)[12] Total fit time: 2.252 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=1054.780, Time=0.08 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=1082.378, Time=0.01 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=1052.687, Time=0.05 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=1053.002, Time=0.07 sec ARIMA(1,1,0)(0,1,0)[12] : AIC=1073.408, Time=0.02 sec ARIMA(1,1,0)(2,1,0)[12] : AIC=1053.938, Time=0.22 sec ARIMA(1,1,0)(1,1,1)[12] : AIC=1053.849, Time=0.09 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=1058.194, Time=0.07 sec ARIMA(1,1,0)(2,1,1)[12] : AIC=1055.848, Time=0.30 sec ARIMA(0,1,0)(1,1,0)[12] : AIC=1052.162, Time=0.04 sec ARIMA(0,1,0)(2,1,0)[12] : AIC=inf, Time=0.16 sec ARIMA(0,1,0)(1,1,1)[12] : AIC=1061.298, Time=0.06 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=1065.090, Time=0.03 sec ARIMA(0,1,0)(2,1,1)[12] : AIC=1063.235, Time=0.18 sec ARIMA(0,1,1)(1,1,0)[12] : AIC=1049.381, Time=0.06 sec ARIMA(0,1,1)(0,1,0)[12] : AIC=1066.100, Time=0.02 sec ARIMA(0,1,1)(2,1,0)[12] : AIC=1050.428, Time=0.21 sec ARIMA(0,1,1)(1,1,1)[12] : AIC=1050.388, Time=0.09 sec ARIMA(0,1,1)(2,1,1)[12] : AIC=1052.365, Time=0.25 sec ARIMA(1,1,1)(1,1,0)[12] : AIC=1050.786, Time=0.08 sec ARIMA(0,1,2)(1,1,0)[12] : AIC=1043.028, Time=0.08 sec ARIMA(0,1,2)(0,1,0)[12] : AIC=1069.384, Time=0.04 sec ARIMA(0,1,2)(2,1,0)[12] : AIC=1043.097, Time=0.26 sec ARIMA(0,1,2)(1,1,1)[12] : AIC=1042.944, Time=0.13 sec ARIMA(0,1,2)(0,1,1)[12] : AIC=1047.566, Time=0.11 sec ARIMA(0,1,2)(2,1,1)[12] : AIC=1044.878, Time=0.33 sec ARIMA(0,1,2)(1,1,2)[12] : AIC=1044.862, Time=0.32 sec ARIMA(0,1,2)(0,1,2)[12] : AIC=1044.666, Time=0.22 sec ARIMA(0,1,2)(2,1,2)[12] : AIC=1046.860, Time=0.33 sec ARIMA(1,1,2)(1,1,1)[12] : AIC=1044.877, Time=0.22 sec ARIMA(0,1,3)(1,1,1)[12] : AIC=1049.026, Time=0.17 sec ARIMA(1,1,1)(1,1,1)[12] : AIC=1051.763, Time=0.15 sec ARIMA(1,1,3)(1,1,1)[12] : AIC=1051.004, Time=0.29 sec ARIMA(0,1,2)(1,1,1)[12] intercept : AIC=1046.101, Time=0.18 sec Best model: ARIMA(0,1,2)(1,1,1)[12] Total fit time: 4.963 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=940.332, Time=0.12 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=952.079, Time=0.00 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=944.662, Time=0.05 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=939.379, Time=0.08 sec ARIMA(0,1,1)(0,1,0)[12] : AIC=939.449, Time=0.02 sec ARIMA(0,1,1)(1,1,1)[12] : AIC=941.203, Time=0.12 sec ARIMA(0,1,1)(0,1,2)[12] : AIC=941.226, Time=0.17 sec ARIMA(0,1,1)(1,1,0)[12] : AIC=939.590, Time=0.06 sec ARIMA(0,1,1)(1,1,2)[12] : AIC=943.200, Time=0.41 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=946.941, Time=0.03 sec ARIMA(0,1,2)(0,1,1)[12] : AIC=940.217, Time=0.08 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=944.163, Time=0.08 sec ARIMA(1,1,2)(0,1,1)[12] : AIC=942.041, Time=0.15 sec ARIMA(0,1,1)(0,1,1)[12] intercept : AIC=942.808, Time=0.07 sec Best model: ARIMA(0,1,1)(0,1,1)[12] Total fit time: 1.448 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=1023.202, Time=0.12 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=1037.787, Time=0.01 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=1030.361, Time=0.06 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=1023.109, Time=0.08 sec ARIMA(0,1,1)(0,1,0)[12] : AIC=1030.864, Time=0.03 sec ARIMA(0,1,1)(1,1,1)[12] : AIC=1025.107, Time=0.11 sec ARIMA(0,1,1)(0,1,2)[12] : AIC=1025.104, Time=0.19 sec ARIMA(0,1,1)(1,1,0)[12] : AIC=1027.640, Time=0.07 sec ARIMA(0,1,1)(1,1,2)[12] : AIC=1026.575, Time=0.31 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=1025.814, Time=0.04 sec ARIMA(0,1,2)(0,1,1)[12] : AIC=1023.372, Time=0.08 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=1025.602, Time=0.05 sec ARIMA(1,1,2)(0,1,1)[12] : AIC=1025.203, Time=0.13 sec ARIMA(0,1,1)(0,1,1)[12] intercept : AIC=1027.179, Time=0.07 sec Best model: ARIMA(0,1,1)(0,1,1)[12] Total fit time: 1.366 seconds Performing stepwise search to minimize aic ARIMA(1,0,1)(0,1,1)[12] intercept : AIC=1068.426, Time=0.10 sec ARIMA(0,0,0)(0,1,0)[12] intercept : AIC=1062.887, Time=0.02 sec ARIMA(1,0,0)(1,1,0)[12] intercept : AIC=1066.592, Time=0.06 sec ARIMA(0,0,1)(0,1,1)[12] intercept : AIC=1066.431, Time=0.08 sec ARIMA(0,0,0)(0,1,0)[12] : AIC=1072.871, Time=0.01 sec ARIMA(0,0,0)(1,1,0)[12] intercept : AIC=1064.513, Time=0.02 sec ARIMA(0,0,0)(0,1,1)[12] intercept : AIC=1064.577, Time=0.05 sec ARIMA(0,0,0)(1,1,1)[12] intercept : AIC=1066.411, Time=0.22 sec ARIMA(1,0,0)(0,1,0)[12] intercept : AIC=1064.674, Time=0.03 sec ARIMA(0,0,1)(0,1,0)[12] intercept : AIC=1064.764, Time=0.03 sec ARIMA(1,0,1)(0,1,0)[12] intercept : AIC=1066.691, Time=0.03 sec Best model: ARIMA(0,0,0)(0,1,0)[12] intercept Total fit time: 0.652 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=1028.548, Time=0.13 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=1039.073, Time=0.01 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=1034.262, Time=0.04 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=1028.315, Time=0.06 sec ARIMA(0,1,1)(0,1,0)[12] : AIC=1032.953, Time=0.02 sec ARIMA(0,1,1)(1,1,1)[12] : AIC=1029.536, Time=0.14 sec ARIMA(0,1,1)(0,1,2)[12] : AIC=1029.443, Time=0.16 sec ARIMA(0,1,1)(1,1,0)[12] : AIC=1031.082, Time=0.03 sec ARIMA(0,1,1)(1,1,2)[12] : AIC=1031.443, Time=0.39 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=1030.574, Time=0.04 sec ARIMA(0,1,2)(0,1,1)[12] : AIC=1027.829, Time=0.09 sec ARIMA(0,1,2)(0,1,0)[12] : AIC=1033.704, Time=0.04 sec ARIMA(0,1,2)(1,1,1)[12] : AIC=1029.190, Time=0.16 sec ARIMA(0,1,2)(0,1,2)[12] : AIC=1029.031, Time=0.22 sec ARIMA(0,1,2)(1,1,0)[12] : AIC=1031.708, Time=0.07 sec ARIMA(0,1,2)(1,1,2)[12] : AIC=1030.998, Time=0.38 sec ARIMA(1,1,2)(0,1,1)[12] : AIC=1029.785, Time=0.14 sec ARIMA(0,1,3)(0,1,1)[12] : AIC=1030.798, Time=0.13 sec ARIMA(1,1,3)(0,1,1)[12] : AIC=1032.632, Time=0.27 sec ARIMA(0,1,2)(0,1,1)[12] intercept : AIC=1035.310, Time=0.14 sec Best model: ARIMA(0,1,2)(0,1,1)[12] Total fit time: 2.686 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=975.219, Time=0.13 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=992.288, Time=0.01 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=978.393, Time=0.06 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=973.225, Time=0.07 sec ARIMA(0,1,1)(0,1,0)[12] : AIC=980.797, Time=0.02 sec ARIMA(0,1,1)(1,1,1)[12] : AIC=inf, Time=0.19 sec ARIMA(0,1,1)(0,1,2)[12] : AIC=inf, Time=0.23 sec ARIMA(0,1,1)(1,1,0)[12] : AIC=976.173, Time=0.05 sec ARIMA(0,1,1)(1,1,2)[12] : AIC=inf, Time=0.41 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=982.926, Time=0.05 sec ARIMA(0,1,2)(0,1,1)[12] : AIC=974.662, Time=0.08 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=975.911, Time=0.07 sec ARIMA(1,1,2)(0,1,1)[12] : AIC=976.076, Time=0.24 sec ARIMA(0,1,1)(0,1,1)[12] intercept : AIC=983.462, Time=0.08 sec Best model: ARIMA(0,1,1)(0,1,1)[12] Total fit time: 1.698 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=988.204, Time=0.09 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=1012.578, Time=0.01 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=992.027, Time=0.06 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=986.425, Time=0.05 sec ARIMA(0,1,1)(0,1,0)[12] : AIC=994.438, Time=0.02 sec ARIMA(0,1,1)(1,1,1)[12] : AIC=987.467, Time=0.09 sec ARIMA(0,1,1)(0,1,2)[12] : AIC=987.517, Time=0.14 sec ARIMA(0,1,1)(1,1,0)[12] : AIC=986.645, Time=0.04 sec ARIMA(0,1,1)(1,1,2)[12] : AIC=989.467, Time=0.32 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=997.294, Time=0.03 sec ARIMA(0,1,2)(0,1,1)[12] : AIC=987.000, Time=0.07 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=992.635, Time=0.06 sec ARIMA(1,1,2)(0,1,1)[12] : AIC=986.923, Time=0.15 sec ARIMA(0,1,1)(0,1,1)[12] intercept : AIC=986.290, Time=0.08 sec ARIMA(0,1,1)(0,1,0)[12] intercept : AIC=993.259, Time=0.05 sec ARIMA(0,1,1)(1,1,1)[12] intercept : AIC=987.599, Time=0.14 sec ARIMA(0,1,1)(0,1,2)[12] intercept : AIC=987.294, Time=0.23 sec ARIMA(0,1,1)(1,1,0)[12] intercept : AIC=987.007, Time=0.06 sec ARIMA(0,1,1)(1,1,2)[12] intercept : AIC=989.292, Time=0.41 sec ARIMA(0,1,0)(0,1,1)[12] intercept : AIC=999.825, Time=0.05 sec ARIMA(1,1,1)(0,1,1)[12] intercept : AIC=988.261, Time=0.13 sec ARIMA(0,1,2)(0,1,1)[12] intercept : AIC=994.644, Time=0.11 sec ARIMA(1,1,0)(0,1,1)[12] intercept : AIC=994.647, Time=0.07 sec ARIMA(1,1,2)(0,1,1)[12] intercept : AIC=988.778, Time=0.18 sec Best model: ARIMA(0,1,1)(0,1,1)[12] intercept Total fit time: 2.675 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=962.599, Time=0.11 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=974.071, Time=0.02 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=962.580, Time=0.05 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=963.024, Time=0.07 sec ARIMA(1,1,0)(0,1,0)[12] : AIC=972.979, Time=0.01 sec ARIMA(1,1,0)(2,1,0)[12] : AIC=964.517, Time=0.18 sec ARIMA(1,1,0)(1,1,1)[12] : AIC=964.449, Time=0.13 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=964.962, Time=0.24 sec ARIMA(1,1,0)(2,1,1)[12] : AIC=966.549, Time=0.50 sec ARIMA(0,1,0)(1,1,0)[12] : AIC=959.343, Time=0.05 sec ARIMA(0,1,0)(2,1,0)[12] : AIC=961.257, Time=0.12 sec ARIMA(0,1,0)(1,1,1)[12] : AIC=963.019, Time=0.10 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=964.762, Time=0.04 sec ARIMA(0,1,0)(2,1,1)[12] : AIC=964.936, Time=0.33 sec ARIMA(0,1,1)(1,1,0)[12] : AIC=961.311, Time=0.05 sec ARIMA(1,1,1)(1,1,0)[12] : AIC=960.345, Time=0.08 sec ARIMA(0,1,0)(1,1,0)[12] intercept : AIC=963.624, Time=0.05 sec Best model: ARIMA(0,1,0)(1,1,0)[12] Total fit time: 2.118 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=985.064, Time=0.09 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=998.992, Time=0.02 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=991.308, Time=0.05 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=985.289, Time=0.08 sec ARIMA(1,1,1)(0,1,0)[12] : AIC=inf, Time=0.06 sec ARIMA(1,1,1)(1,1,1)[12] : AIC=986.633, Time=0.17 sec ARIMA(1,1,1)(0,1,2)[12] : AIC=986.384, Time=0.27 sec ARIMA(1,1,1)(1,1,0)[12] : AIC=989.898, Time=0.21 sec ARIMA(1,1,1)(1,1,2)[12] : AIC=988.099, Time=0.61 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=987.247, Time=0.10 sec ARIMA(2,1,1)(0,1,1)[12] : AIC=985.890, Time=0.31 sec ARIMA(1,1,2)(0,1,1)[12] : AIC=986.297, Time=0.26 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=984.987, Time=0.05 sec ARIMA(0,1,0)(1,1,1)[12] : AIC=986.605, Time=0.08 sec ARIMA(0,1,0)(0,1,2)[12] : AIC=985.927, Time=0.12 sec ARIMA(0,1,0)(1,1,0)[12] : AIC=988.088, Time=0.04 sec ARIMA(0,1,0)(1,1,2)[12] : AIC=987.658, Time=0.37 sec ARIMA(0,1,0)(0,1,1)[12] intercept : AIC=987.888, Time=0.06 sec Best model: ARIMA(0,1,0)(0,1,1)[12] Total fit time: 2.968 seconds Performing stepwise search to minimize aic ARIMA(1,0,1)(0,1,1)[12] intercept : AIC=1134.793, Time=0.24 sec ARIMA(0,0,0)(0,1,0)[12] intercept : AIC=1138.810, Time=0.00 sec ARIMA(1,0,0)(1,1,0)[12] intercept : AIC=1135.930, Time=0.08 sec ARIMA(0,0,1)(0,1,1)[12] intercept : AIC=1136.548, Time=0.10 sec ARIMA(0,0,0)(0,1,0)[12] : AIC=1148.552, Time=0.00 sec ARIMA(1,0,1)(0,1,0)[12] intercept : AIC=1142.805, Time=0.05 sec ARIMA(1,0,1)(1,1,1)[12] intercept : AIC=inf, Time=0.56 sec ARIMA(1,0,1)(0,1,2)[12] intercept : AIC=inf, Time=0.87 sec ARIMA(1,0,1)(1,1,0)[12] intercept : AIC=1136.937, Time=0.13 sec ARIMA(1,0,1)(1,1,2)[12] intercept : AIC=1137.106, Time=0.73 sec ARIMA(1,0,0)(0,1,1)[12] intercept : AIC=1136.833, Time=0.07 sec ARIMA(2,0,1)(0,1,1)[12] intercept : AIC=1136.616, Time=0.23 sec ARIMA(1,0,2)(0,1,1)[12] intercept : AIC=inf, Time=0.48 sec ARIMA(0,0,0)(0,1,1)[12] intercept : AIC=1134.595, Time=0.05 sec ARIMA(0,0,0)(1,1,1)[12] intercept : AIC=1135.548, Time=0.08 sec ARIMA(0,0,0)(0,1,2)[12] intercept : AIC=1136.472, Time=0.13 sec ARIMA(0,0,0)(1,1,0)[12] intercept : AIC=1133.558, Time=0.05 sec ARIMA(0,0,0)(2,1,0)[12] intercept : AIC=1133.534, Time=0.15 sec ARIMA(0,0,0)(2,1,1)[12] intercept : AIC=1133.577, Time=0.28 sec ARIMA(1,0,0)(2,1,0)[12] intercept : AIC=1135.766, Time=0.20 sec ARIMA(0,0,1)(2,1,0)[12] intercept : AIC=1135.663, Time=0.23 sec ARIMA(1,0,1)(2,1,0)[12] intercept : AIC=1135.265, Time=0.48 sec ARIMA(0,0,0)(2,1,0)[12] : AIC=1129.228, Time=0.10 sec ARIMA(0,0,0)(1,1,0)[12] : AIC=1136.723, Time=0.03 sec ARIMA(0,0,0)(2,1,1)[12] : AIC=1130.516, Time=0.22 sec ARIMA(0,0,0)(1,1,1)[12] : AIC=1129.090, Time=0.08 sec ARIMA(0,0,0)(0,1,1)[12] : AIC=1129.642, Time=0.07 sec ARIMA(0,0,0)(1,1,2)[12] : AIC=1130.256, Time=0.26 sec ARIMA(0,0,0)(0,1,2)[12] : AIC=1128.328, Time=0.14 sec ARIMA(1,0,0)(0,1,2)[12] : AIC=1134.685, Time=0.20 sec ARIMA(0,0,1)(0,1,2)[12] : AIC=1135.139, Time=0.18 sec ARIMA(1,0,1)(0,1,2)[12] : AIC=1134.195, Time=0.50 sec Best model: ARIMA(0,0,0)(0,1,2)[12] Total fit time: 7.024 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=1030.320, Time=0.09 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=1054.827, Time=0.02 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=1036.719, Time=0.08 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=1028.604, Time=0.05 sec ARIMA(0,1,1)(0,1,0)[12] : AIC=1039.707, Time=0.02 sec ARIMA(0,1,1)(1,1,1)[12] : AIC=1029.784, Time=0.12 sec ARIMA(0,1,1)(0,1,2)[12] : AIC=1029.941, Time=0.18 sec ARIMA(0,1,1)(1,1,0)[12] : AIC=1028.715, Time=0.07 sec ARIMA(0,1,1)(1,1,2)[12] : AIC=1031.635, Time=0.38 sec ARIMA(0,1,0)(0,1,1)[12] : AIC=1039.000, Time=0.05 sec ARIMA(0,1,2)(0,1,1)[12] : AIC=1026.061, Time=0.10 sec ARIMA(0,1,2)(0,1,0)[12] : AIC=1038.478, Time=0.05 sec ARIMA(0,1,2)(1,1,1)[12] : AIC=1027.640, Time=0.14 sec ARIMA(0,1,2)(0,1,2)[12] : AIC=1027.738, Time=0.22 sec ARIMA(0,1,2)(1,1,0)[12] : AIC=1026.824, Time=0.07 sec ARIMA(0,1,2)(1,1,2)[12] : AIC=1029.279, Time=0.49 sec ARIMA(1,1,2)(0,1,1)[12] : AIC=1024.848, Time=0.12 sec ARIMA(1,1,2)(0,1,0)[12] : AIC=1036.799, Time=0.05 sec ARIMA(1,1,2)(1,1,1)[12] : AIC=1026.641, Time=0.20 sec ARIMA(1,1,2)(0,1,2)[12] : AIC=1026.674, Time=0.31 sec ARIMA(1,1,2)(1,1,0)[12] : AIC=1026.317, Time=0.14 sec ARIMA(1,1,2)(1,1,2)[12] : AIC=1028.378, Time=0.66 sec ARIMA(2,1,2)(0,1,1)[12] : AIC=1020.557, Time=0.15 sec ARIMA(2,1,2)(0,1,0)[12] : AIC=1036.990, Time=0.07 sec ARIMA(2,1,2)(1,1,1)[12] : AIC=1022.556, Time=0.24 sec ARIMA(2,1,2)(0,1,2)[12] : AIC=1022.556, Time=0.36 sec ARIMA(2,1,2)(1,1,0)[12] : AIC=1024.383, Time=0.18 sec ARIMA(2,1,2)(1,1,2)[12] : AIC=inf, Time=1.10 sec ARIMA(2,1,1)(0,1,1)[12] : AIC=1019.582, Time=0.14 sec ARIMA(2,1,1)(0,1,0)[12] : AIC=1038.329, Time=0.08 sec ARIMA(2,1,1)(1,1,1)[12] : AIC=1021.552, Time=0.22 sec ARIMA(2,1,1)(0,1,2)[12] : AIC=1021.552, Time=0.36 sec ARIMA(2,1,1)(1,1,0)[12] : AIC=1023.570, Time=0.15 sec ARIMA(2,1,1)(1,1,2)[12] : AIC=1023.407, Time=0.82 sec ARIMA(2,1,0)(0,1,1)[12] : AIC=1020.153, Time=0.28 sec ARIMA(3,1,1)(0,1,1)[12] : AIC=1021.220, Time=0.21 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=1038.120, Time=0.04 sec ARIMA(3,1,0)(0,1,1)[12] : AIC=1019.319, Time=0.13 sec ARIMA(3,1,0)(0,1,0)[12] : AIC=1038.684, Time=0.05 sec ARIMA(3,1,0)(1,1,1)[12] : AIC=1021.293, Time=0.19 sec ARIMA(3,1,0)(0,1,2)[12] : AIC=1021.293, Time=0.22 sec ARIMA(3,1,0)(1,1,0)[12] : AIC=1023.448, Time=0.14 sec ARIMA(3,1,0)(1,1,2)[12] : AIC=inf, Time=0.72 sec ARIMA(3,1,0)(0,1,1)[12] intercept : AIC=1020.833, Time=0.12 sec Best model: ARIMA(3,1,0)(0,1,1)[12] Total fit time: 9.646 seconds Performing stepwise search to minimize aic ARIMA(1,1,1)(0,1,1)[12] : AIC=1045.738, Time=0.08 sec ARIMA(0,1,0)(0,1,0)[12] : AIC=1053.111, Time=0.02 sec ARIMA(1,1,0)(1,1,0)[12] : AIC=1043.008, Time=0.07 sec ARIMA(0,1,1)(0,1,1)[12] : AIC=1044.378, Time=0.05 sec ARIMA(1,1,0)(0,1,0)[12] : AIC=1053.176, Time=0.02 sec ARIMA(1,1,0)(2,1,0)[12] : AIC=1043.743, Time=0.16 sec ARIMA(1,1,0)(1,1,1)[12] : AIC=1043.714, Time=0.08 sec ARIMA(1,1,0)(0,1,1)[12] : AIC=1043.787, Time=0.05 sec ARIMA(1,1,0)(2,1,1)[12] : AIC=1045.671, Time=0.27 sec ARIMA(0,1,0)(1,1,0)[12] : AIC=1038.339, Time=0.03 sec ARIMA(0,1,0)(2,1,0)[12] : AIC=1036.662, Time=0.12 sec ARIMA(0,1,0)(2,1,1)[12] : AIC=1042.805, Time=0.16 sec ARIMA(0,1,0)(1,1,1)[12] : AIC=1040.855, Time=0.07 sec ARIMA(0,1,1)(2,1,0)[12] : AIC=1044.359, Time=0.16 sec ARIMA(1,1,1)(2,1,0)[12] : AIC=1045.692, Time=0.16 sec ARIMA(0,1,0)(2,1,0)[12] intercept : AIC=1043.339, Time=0.14 sec Best model: ARIMA(0,1,0)(2,1,0)[12] Total fit time: 1.639 seconds
pd.options.display.float_format = '{:.0f}'.format
modeldescription.index=['ModelDescription']
rmse.index=['RMSE']
forecast.result = modeldescription.append(rmse)
forecast.result = forecast.result.append(forecast)
forecast_data = forecast.result[1:]
forecast_info = forecast.result[0:1]
forecast_data = forecast_data.astype('int32')
df = pd.read_excel (r'C:\Users\jupe\Desktop\Praktik\210\210 - Forecast input.xlsx',sheet_name='210 - KPI')
Periode = pd.to_datetime(df.Periode)
df.Periode = pd.to_datetime(df.Periode)
df = df.set_index('Periode')
df_train = df.iloc[:48]
df_val = df.iloc[48:56]
df_test = df.iloc[56:]
df_train_omsætning = df_train['Omsætning (t)']
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_train_omsætning)
result[1]
0.4925797664100006
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_omsætning)
ax.set(xlabel="År og måned",
ylabel="Omsætning (mdkk)",
title="Udvikling i omsætning")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_omsætning.index[0], 'Y')
datemax = np.datetime64(df_train_omsætning.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
results = seasonal_decompose(df_train_omsætning, model='addidative')
periodic_estimate_omsætning = results.seasonal
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(periodic_estimate_omsætning)
ax.set(xlabel="År og måned",
ylabel="Omsætning",
title='Sæsoneffekt')
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_omsætning.index[0], 'Y')
datemax = np.datetime64(df_train_omsætning.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_omsætning = df_train_omsætning-periodic_estimate_omsætning
result = adfuller(df_train_omsætning)
result[1]
0.4186683266739646
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_omsætning)
ax.set(xlabel="År og måned",
ylabel="Omsætning (mdkk)",
title="Udvikling i omsætning")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_omsætning.index[0], 'Y')
datemax = np.datetime64(df_train_omsætning.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_omsætning = df_train_omsætning.diff(periods = 1).dropna(axis = 0)
df_train_omsætning = df_train_omsætning.dropna(axis=0)
result = adfuller(df_train_omsætning)
result[1]
1.2348533463838644e-11
omsætning_train_min = min(df_train_omsætning)
omsætning_train_max = max(df_train_omsætning)
df_train_omsætning = (df_train_omsætning-omsætning_train_min)/(omsætning_train_max-omsætning_train_min)
df_train_omsætning = pd.DataFrame(df_train_omsætning, columns = ['Omsætning (t)'])
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_omsætning)
ax.set(xlabel="År og måned",
ylabel="Omsætning",
title="Udvikling i omsætning")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_omsætning.index[0], 'Y')
datemax = np.datetime64(df_train_omsætning.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_val_omsætning = df['Omsætning (t)']
df_val_omsætning = df_val_omsætning.iloc[47:56]
val_periodisering = periodic_estimate_omsætning[11:20]
df_val_omsætning = df_val_omsætning - val_periodisering.values
df_val_omsætning = df_val_omsætning.diff(periods = 1)
df_val_omsætning = df_val_omsætning.dropna(axis=0)
df_val_omsætning = (df_val_omsætning-omsætning_train_min)/(omsætning_train_max-omsætning_train_min)
df_val_omsætning = pd.DataFrame(df_val_omsætning, columns = ['Omsætning (t)'])
df_test_omsætning = df['Omsætning (t)']
df_test_omsætning = df_test_omsætning.iloc[55:]
test_periodisering = periodic_estimate_omsætning[8:12]
df_test_omsætning = df_test_omsætning - test_periodisering.values
df_test_omsætning = df_test_omsætning.diff(periods = 1)
df_test_omsætning = df_test_omsætning.dropna(axis=0)
df_test_omsætning = (df_test_omsætning-omsætning_train_min)/(omsætning_train_max-omsætning_train_min)
df_test_omsætning = pd.DataFrame(df_test_omsætning, columns = ['Omsætning (t)'])
df_train_vareforbrug = df_train['Vareforbrug (t)']
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_vareforbrug)
ax.set(xlabel="År og måned",
ylabel="Vareforbrug (mdkk)",
title="Udvikling i vareforbrug")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_vareforbrug.index[0], 'Y')
datemax = np.datetime64(df_train_vareforbrug.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
results = seasonal_decompose(df_train_vareforbrug, model='addidative')
periodic_estimate_vareforbrug = results.seasonal
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(periodic_estimate_vareforbrug)
ax.set(xlabel="År og måned",
ylabel="Vareforbrug",
title="Sæson effekt")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_vareforbrug.index[0], 'Y')
datemax = np.datetime64(df_train_vareforbrug.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_vareforbrug = df_train_vareforbrug-periodic_estimate_vareforbrug
result = adfuller(df_train_vareforbrug)
result[1]
0.01974672849389804
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_vareforbrug)
ax.set(xlabel="År og måned",
ylabel="Vareforbrug (mdkk)",
title="Udvikling i vareforbrug")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_vareforbrug.index[0], 'Y')
datemax = np.datetime64(df_train_vareforbrug.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_vareforbrug = df_train_vareforbrug.diff(periods = 1).dropna(axis = 0)
df_train_vareforbrug = df_train_vareforbrug.dropna(axis=0)
result = adfuller(df_train_vareforbrug)
result[1]
1.5101574505942425e-10
vareforbrug_train_min = min(df_train_vareforbrug)
vareforbrug_train_max = max(df_train_vareforbrug)
df_train_vareforbrug = (df_train_vareforbrug-vareforbrug_train_min)/(vareforbrug_train_max-vareforbrug_train_min)
df_train_vareforbrug = pd.DataFrame(df_train_vareforbrug, columns = ['Vareforbrug (t)'])
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_vareforbrug)
ax.set(xlabel="År og måned",
ylabel="Vareforbrug",
title="Udvikling i vareforbrug")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_vareforbrug.index[0], 'Y')
datemax = np.datetime64(df_train_vareforbrug.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_val_vareforbrug = df['Vareforbrug (t)']
df_val_vareforbrug = df_val_vareforbrug.iloc[47:56]
val_periodisering = periodic_estimate_vareforbrug[11:20]
df_val_vareforbrug = df_val_vareforbrug - val_periodisering.values
df_val_vareforbrug = df_val_vareforbrug.diff(periods = 1)
df_val_vareforbrug = df_val_vareforbrug.dropna(axis=0)
df_val_vareforbrug = (df_val_vareforbrug-vareforbrug_train_min)/(vareforbrug_train_max-vareforbrug_train_min)
df_val_vareforbrug = pd.DataFrame(df_val_vareforbrug, columns = ['Vareforbrug (t)'])
df_test_vareforbrug = df['Vareforbrug (t)']
df_test_vareforbrug = df_test_vareforbrug.iloc[55:]
test_periodisering = periodic_estimate_vareforbrug[8:12]
df_test_vareforbrug = df_test_vareforbrug - test_periodisering.values
df_test_vareforbrug = df_test_vareforbrug.diff(periods = 1)
df_test_vareforbrug = df_test_vareforbrug.dropna(axis=0)
df_test_vareforbrug = (df_test_vareforbrug-vareforbrug_train_min)/(vareforbrug_train_max-vareforbrug_train_min)
df_test_vareforbrug = pd.DataFrame(df_test_vareforbrug, columns = ['Vareforbrug (t)'])
df_train_lønforbrug = df_train['Lønforbrug (t)']
result = adfuller(df_train_lønforbrug)
result[1]
0.97262213302789
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_lønforbrug)
ax.set(xlabel="År og måned",
ylabel="Lønforbrug (mdkk)",
title="Udvikling i lønforbrug")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_lønforbrug.index[0], 'Y')
datemax = np.datetime64(df_train_lønforbrug.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
results = seasonal_decompose(df_train_lønforbrug, model='addidative')
periodic_estimate_lønforbrug = results.seasonal
df_train_lønforbrug = df_train_lønforbrug-periodic_estimate_lønforbrug
result = adfuller(df_train_lønforbrug)
result[1]
0.6079907889780972
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(periodic_estimate_lønforbrug)
ax.set(xlabel="År og måned",
ylabel="Lønforbrug",
title="Sæson effekt")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_lønforbrug.index[0], 'Y')
datemax = np.datetime64(df_train_lønforbrug.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_lønforbrug = df_train_lønforbrug-periodic_estimate_lønforbrug
result = adfuller(df_train_lønforbrug)
result[1]
0.9364897554528006
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_lønforbrug)
ax.set(xlabel="År og måned",
ylabel="Lønforbrug (mdkk)",
title="Udvikling i lønforbrug")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_lønforbrug.index[0], 'Y')
datemax = np.datetime64(df_train_lønforbrug.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_lønforbrug = df_train_lønforbrug.diff(periods = 1).dropna(axis = 0)
df_train_lønforbrug = df_train_lønforbrug.dropna(axis=0)
result = adfuller(df_train_lønforbrug)
result[1]
9.544241725732661e-16
lønforbrug_train_min = min(df_train_lønforbrug)
lønforbrug_train_max = max(df_train_lønforbrug)
df_train_lønforbrug = (df_train_lønforbrug-lønforbrug_train_min)/(lønforbrug_train_max-lønforbrug_train_min)
df_train_lønforbrug = pd.DataFrame(df_train_lønforbrug, columns = ['Lønforbrug (t)'])
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_lønforbrug)
ax.set(xlabel="År og måned",
ylabel="Lønforbrug",
title="Udvikling i lønforbrug")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_lønforbrug.index[0], 'Y')
datemax = np.datetime64(df_train_lønforbrug.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_val_lønforbrug = df['Lønforbrug (t)']
df_val_lønforbrug = df_val_lønforbrug.iloc[47:56]
val_periodisering = periodic_estimate_lønforbrug[11:20]
df_val_lønforbrug = df_val_lønforbrug - val_periodisering.values
df_val_lønforbrug = df_val_lønforbrug.diff(periods = 1)
df_val_lønforbrug = df_val_lønforbrug.dropna(axis=0)
df_val_lønforbrug = (df_val_lønforbrug-lønforbrug_train_min)/(lønforbrug_train_max-lønforbrug_train_min)
df_val_lønforbrug = pd.DataFrame(df_val_lønforbrug, columns = ['Lønforbrug (t)'])
df_test_lønforbrug = df['Lønforbrug (t)']
df_test_lønforbrug = df_test_lønforbrug.iloc[55:]
test_periodisering = periodic_estimate_lønforbrug[8:12]
df_test_lønforbrug = df_test_lønforbrug - test_periodisering.values
df_test_lønforbrug = df_test_lønforbrug.diff(periods = 1)
df_test_lønforbrug = df_test_lønforbrug.dropna(axis=0)
df_test_lønforbrug = (df_test_lønforbrug-lønforbrug_train_min)/(lønforbrug_train_max-lønforbrug_train_min)
df_test_lønforbrug = pd.DataFrame(df_test_lønforbrug, columns = ['Lønforbrug (t)'])
df_train_faktureringsgrad = df_train['Faktureringsgrad (t)']
result = adfuller(df_train_faktureringsgrad)
result[1]
0.0002342165980579539
faktureringsgrad_train_min = min(df_train_faktureringsgrad)
faktureringsgrad_train_max = max(df_train_faktureringsgrad)
df_train_faktureringsgrad = (df_train_faktureringsgrad-faktureringsgrad_train_min)/(faktureringsgrad_train_max-faktureringsgrad_train_min)
df_train_faktureringsgrad = pd.DataFrame(df_train_faktureringsgrad, columns = ['Faktureringsgrad (t)'])
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_faktureringsgrad)
ax.set(xlabel="År og måned",
ylabel="Faktureringsgrad",
title="Udvikling i faktureringsgrad")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_faktureringsgrad.index[0], 'Y')
datemax = np.datetime64(df_train_faktureringsgrad.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_val_faktureringsgrad = df_val['Faktureringsgrad (t)']
df_val_faktureringsgrad = (df_val_faktureringsgrad-faktureringsgrad_train_min)/(faktureringsgrad_train_max-faktureringsgrad_train_min)
df_val_faktureringsgrad = pd.DataFrame(df_val_faktureringsgrad, columns = ['Faktureringsgrad (t)'])
df_test_faktureringsgrad = df_test['Faktureringsgrad (t)']
df_test_faktureringsgrad = (df_test_faktureringsgrad-faktureringsgrad_train_min)/(faktureringsgrad_train_max-faktureringsgrad_train_min)
df_test_faktureringsgrad = pd.DataFrame(df_test_faktureringsgrad, columns = ['Faktureringsgrad (t)'])
df_train_antalp = df_train['Antal produktionsmedarbejdere (t)']
result = adfuller(df_train_antalp)
result[1]
0.275744409221283
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_antalp)
ax.set(xlabel="År og måned",
ylabel="Antal produktionsmedarbejdere",
title="Udvikling i antal produktionsmedarbejdere")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_antalp.index[0], 'Y')
datemax = np.datetime64(df_train_antalp.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_antalp = np.log(df_train_antalp)
result = adfuller(df_train_antalp)
result[1]
0.17001858895926958
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_antalp)
ax.set(xlabel="År og måned",
ylabel="Antal produktionsmedarbejdere",
title="Udvikling i antal produktionsmedarbejdere")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_antalp.index[0], 'Y')
datemax = np.datetime64(df_train_antalp.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
results = seasonal_decompose(df_train_antalp, model='addidative')
periodic_estimate_antalp = results.seasonal
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(periodic_estimate_antalp)
ax.set(xlabel="År og måned",
ylabel="Antal produktionsmedarbejdere",
title="Sæson effekt")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_antalp.index[0], 'Y')
datemax = np.datetime64(df_train_antalp.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_antalp = df_train_antalp-periodic_estimate_antalp
result = adfuller(df_train_antalp)
result[1]
0.17407814170812058
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_antalp)
ax.set(xlabel="År og måned",
ylabel="Antal produktionsmedarbejdere",
title="Udvikling i antal produktionsmedarbejdere")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_antalp.index[0], 'Y')
datemax = np.datetime64(df_train_antalp.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_antalp = df_train_antalp.diff(periods = 1).dropna(axis = 0)
df_train_antalp = df_train_antalp.dropna(axis=0)
result = adfuller(df_train_antalp)
result[1]
0.00018877780640718744
antalp_train_min = min(df_train_antalp)
antalp_train_max = max(df_train_antalp)
df_train_antalp = (df_train_antalp-antalp_train_min)/(antalp_train_max-antalp_train_min)
df_train_antalp = pd.DataFrame(df_train_antalp, columns = ['Antal produktionsmedarbejdere (t)'])
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_antalp)
ax.set(xlabel="År og måned",
ylabel="Antal produktionsmedarbejdere",
title="Udvikling i antal produktionsmedarbejdere")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_antalp.index[0], 'Y')
datemax = np.datetime64(df_train_antalp.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_val_antalp = df['Antal produktionsmedarbejdere (t)']
df_val_antalp = df_val_antalp.iloc[47:56]
df_val_antalp = np.log(df_val_antalp)
val_periodisering = periodic_estimate_antalp[11:20]
df_val_antalp = df_val_antalp - val_periodisering.values
df_val_antalp = df_val_antalp.diff(periods = 1)
df_val_antalp = df_val_antalp.dropna(axis=0)
df_val_antalp = (df_val_antalp-antalp_train_min)/(antalp_train_max-antalp_train_min)
df_val_antalp = pd.DataFrame(df_val_antalp, columns = ['Antal produktionsmedarbejdere (t)'])
df_test_antalp = df['Antal produktionsmedarbejdere (t)']
df_test_antalp = df_test_antalp.iloc[55:]
df_test_antalp = np.log(df_test_antalp)
val_periodisering = periodic_estimate_antalp[8:12]
df_test_antalp = df_test_antalp - val_periodisering.values
df_test_antalp = df_test_antalp.diff(periods = 1)
df_test_antalp = df_test_antalp.dropna(axis=0)
df_test_antalp = (df_test_antalp-antalp_train_min)/(antalp_train_max-antalp_train_min)
df_test_antalp = pd.DataFrame(df_test_antalp, columns = ['Antal produktionsmedarbejdere (t)'])
df_train_arbejdsdage = df_train['Arbejdsdage - ferie (t)']
result = adfuller(df_train_arbejdsdage)
result[1]
8.418909248309158e-05
arbejdsdage_train_min = min(df_train_arbejdsdage)
arbejdsdage_train_max = max(df_train_arbejdsdage)
df_train_arbejdsdage = (df_train_arbejdsdage-arbejdsdage_train_min)/(arbejdsdage_train_max-arbejdsdage_train_min)
df_train_arbejdsdage = pd.DataFrame(df_train_arbejdsdage, columns = ['Arbejdsdage - ferie (t)'])
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_arbejdsdage)
ax.set(xlabel="År og måned",
ylabel="Arbejdsdage - ferie",
title="Udvikling i arbejdsdage - ferie")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_arbejdsdage.index[0], 'Y')
datemax = np.datetime64(df_train_arbejdsdage.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_val_arbejdsdage = df_val['Arbejdsdage - ferie (t)']
df_val_arbejdsdage = (df_val_arbejdsdage-arbejdsdage_train_min)/(arbejdsdage_train_max-arbejdsdage_train_min)
df_val_arbejdsdage = pd.DataFrame(df_val_arbejdsdage, columns = ['Arbejdsdage - ferie (t)'])
df_test_arbejdsdage = df_test['Arbejdsdage - ferie (t)']
df_test_arbejdsdage = (df_test_arbejdsdage-arbejdsdage_train_min)/(arbejdsdage_train_max-arbejdsdage_train_min)
df_test_arbejdsdage = pd.DataFrame(df_test_arbejdsdage, columns = ['Arbejdsdage - ferie (t)'])
df_train_fakturerbartid = df_train['Fakturerbar tid (t)']
result = adfuller(df_train_fakturerbartid)
result[1]
0.9242046478151071
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_fakturerbartid)
ax.set(xlabel="År og måned",
ylabel="Fakturerbar tid",
title="Udvikling i fakturerbar tid")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_fakturerbartid.index[0], 'Y')
datemax = np.datetime64(df_train_fakturerbartid.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_fakturerbartid = np.log(df_train_fakturerbartid)
result = adfuller(df_train_fakturerbartid)
result[1]
0.837361022546167
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_fakturerbartid)
ax.set(xlabel="År og måned",
ylabel="Fakturerbar tid",
title="Udvikling i fakturerbar tid")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_fakturerbartid.index[0], 'Y')
datemax = np.datetime64(df_train_fakturerbartid.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
results = seasonal_decompose(df_train_fakturerbartid, model='addidative')
periodic_estimate_fakturerbartid = results.seasonal
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(periodic_estimate_fakturerbartid)
ax.set(xlabel="År og måned",
ylabel="Fakturerbar tid",
title="Sæson effekt")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_fakturerbartid.index[0], 'Y')
datemax = np.datetime64(df_train_fakturerbartid.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_fakturerbartid = df_train_fakturerbartid-periodic_estimate_fakturerbartid
result = adfuller(df_train_fakturerbartid)
result[1]
0.7489928908044364
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_fakturerbartid)
ax.set(xlabel="År og måned",
ylabel="Fakturerbar tid",
title="Udvikling i fakturerbar tid")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_fakturerbartid.index[0], 'Y')
datemax = np.datetime64(df_train_fakturerbartid.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_fakturerbartid = df_train_fakturerbartid.diff(periods = 1)
df_train_fakturerbartid = df_train_fakturerbartid.dropna(axis=0)
result = adfuller(df_train_fakturerbartid)
result[1]
3.98147103312809e-13
fakturerbartid_train_min = min(df_train_fakturerbartid)
fakturerbartid_train_max = max(df_train_fakturerbartid)
df_train_fakturerbartid = (df_train_fakturerbartid-fakturerbartid_train_min)/(fakturerbartid_train_max-fakturerbartid_train_min)
df_train_fakturerbartid = pd.DataFrame(df_train_fakturerbartid, columns = ['Fakturerbar tid (t)'])
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_fakturerbartid)
ax.set(xlabel="År og måned",
ylabel="Fakturerbar tid",
title="Udvikling i Fakturerbar tid")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_fakturerbartid.index[0], 'Y')
datemax = np.datetime64(df_train_fakturerbartid.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_val_fakturerbartid = df['Fakturerbar tid (t)']
df_val_fakturerbartid = df_val_fakturerbartid.iloc[47:56]
df_val_fakturerbartid = np.log(df_val_fakturerbartid)
val_periodisering = periodic_estimate_fakturerbartid[11:20]
df_val_fakturerbartid = df_val_fakturerbartid - val_periodisering.values
df_val_fakturerbartid = df_val_fakturerbartid.diff(periods = 1)
df_val_fakturerbartid = df_val_fakturerbartid.dropna(axis=0)
df_val_fakturerbartid = (df_val_fakturerbartid-fakturerbartid_train_min)/(fakturerbartid_train_max-fakturerbartid_train_min)
df_val_fakturerbartid = pd.DataFrame(df_val_fakturerbartid, columns = ['Fakturerbar tid (t)'])
df_test_fakturerbartid = df['Fakturerbar tid (t)']
df_test_fakturerbartid = df_test_fakturerbartid.iloc[55:]
df_test_fakturerbartid = np.log(df_test_fakturerbartid)
test_periodisering = periodic_estimate_fakturerbartid[8:12]
df_test_fakturerbartid = df_test_fakturerbartid - test_periodisering.values
df_test_fakturerbartid = df_test_fakturerbartid.diff(periods = 1)
df_test_fakturerbartid = df_test_fakturerbartid.dropna(axis=0)
df_test_fakturerbartid = (df_test_fakturerbartid-fakturerbartid_train_min)/(fakturerbartid_train_max-fakturerbartid_train_min)
df_test_fakturerbartid = pd.DataFrame(df_test_fakturerbartid, columns = ['Fakturerbar tid (t)'])
df_train_jobtimer = df_train['Jobtimer (t)']
result = adfuller(df_train_jobtimer)
result[1]
0.9005892686087571
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_jobtimer)
ax.set(xlabel="År og måned",
ylabel="Jobtimer",
title="Udvikling i jobtimer")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_jobtimer.index[0], 'Y')
datemax = np.datetime64(df_train_jobtimer.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
results = seasonal_decompose(df_train_jobtimer, model='addidative')
periodic_estimate_jobtimer = results.seasonal
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(periodic_estimate_jobtimer)
ax.set(xlabel="År og måned",
ylabel="Jobtimer",
title="Sæson effekt")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_jobtimer.index[0], 'Y')
datemax = np.datetime64(df_train_jobtimer.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_jobtimer = df_train_jobtimer-periodic_estimate_jobtimer
result = adfuller(df_train_jobtimer)
result[1]
0.9640269789920451
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_jobtimer)
ax.set(xlabel="År og måned",
ylabel="Jobtimer",
title="Udvikling i jobtimer")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_jobtimer.index[0], 'Y')
datemax = np.datetime64(df_train_jobtimer.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_train_jobtimer = df_train_jobtimer.diff(periods = 1).dropna(axis = 0)
df_train_jobtimer = df_train_jobtimer.dropna(axis=0)
result = adfuller(df_train_jobtimer)
result[1]
0.32967546705757433
jobtimer_train_min = min(df_train_jobtimer)
jobtimer_train_max = max(df_train_jobtimer)
df_train_jobtimer = (df_train_jobtimer-jobtimer_train_min)/(jobtimer_train_max-jobtimer_train_min)
df_train_jobtimer = pd.DataFrame(df_train_jobtimer, columns = ['Jobtimer (t)'])
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_train_jobtimer)
ax.set(xlabel="År og måned",
ylabel="Jobtimer",
title="Udvikling i Jobtimer")
fmt_quarter_year = mdates.MonthLocator(interval=3)
ax.xaxis.set_major_locator(fmt_quarter_year)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_minor_locator(fmt_month)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
datemin = np.datetime64(df_train_jobtimer.index[0], 'Y')
datemax = np.datetime64(df_train_jobtimer.index[-1],'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fig.autofmt_xdate()
plt.show()
df_val_jobtimer = df['Jobtimer (t)']
df_val_jobtimer = df_val_jobtimer.iloc[47:56]
val_periodisering = periodic_estimate_jobtimer[11:20]
df_val_jobtimer = df_val_jobtimer - val_periodisering.values
df_val_jobtimer = df_val_jobtimer.diff(periods = 1)
df_val_jobtimer = df_val_jobtimer.dropna(axis=0)
df_val_jobtimer = (df_val_jobtimer-jobtimer_train_min)/(jobtimer_train_max-jobtimer_train_min)
df_val_jobtimer = pd.DataFrame(df_val_jobtimer, columns = ['Jobtimer (t)'])
df_test_jobtimer = df['Jobtimer (t)']
df_test_jobtimer = df_test_jobtimer.iloc[55:]
test_periodisering = periodic_estimate_jobtimer[8:12]
df_test_jobtimer = df_test_jobtimer - test_periodisering.values
df_test_jobtimer = df_test_jobtimer.diff(periods = 1)
df_test_jobtimer = df_test_jobtimer.dropna(axis=0)
df_test_jobtimer = (df_test_jobtimer-jobtimer_train_min)/(jobtimer_train_max-jobtimer_train_min)
df_test_jobtimer = pd.DataFrame(df_test_jobtimer, columns = ['Jobtimer (t)'])
df_train_copy = df_train
df_train = df_train_omsætning.merge(df_train_vareforbrug, left_index =True, right_index = True)
df_train = df_train.merge(df_train_lønforbrug, left_index =True, right_index = True)
df_train = df_train.merge(df_train_faktureringsgrad, left_index =True, right_index = True)
df_train = df_train.merge(df_train_antalp, left_index =True, right_index = True)
df_train = df_train.merge(df_train_arbejdsdage, left_index =True, right_index = True)
df_train = df_train.merge(df_train_fakturerbartid, left_index =True, right_index = True)
df_train = df_train.merge(df_train_jobtimer, left_index =True, right_index = True)
df_val_copy = df_val
df_val = df_val_omsætning.merge(df_val_vareforbrug, left_index =True, right_index = True)
df_val = df_val.merge(df_val_lønforbrug, left_index =True, right_index = True)
df_val = df_val.merge(df_val_faktureringsgrad, left_index =True, right_index = True)
df_val = df_val.merge(df_val_antalp, left_index =True, right_index = True)
df_val = df_val.merge(df_val_arbejdsdage, left_index =True, right_index = True)
df_val = df_val.merge(df_val_fakturerbartid, left_index =True, right_index = True)
df_val = df_val.merge(df_val_jobtimer, left_index =True, right_index = True)
df_test_copy = df_test
df_test = df_test_omsætning.merge(df_test_vareforbrug, left_index =True, right_index = True)
df_test = df_test.merge(df_test_lønforbrug, left_index =True, right_index = True)
df_test = df_test.merge(df_test_faktureringsgrad, left_index =True, right_index = True)
df_test = df_test.merge(df_test_antalp, left_index =True, right_index = True)
df_test = df_test.merge(df_test_arbejdsdage, left_index =True, right_index = True)
df_test = df_test.merge(df_test_fakturerbartid, left_index =True, right_index = True)
df_test = df_test.merge(df_test_jobtimer, left_index =True, right_index = True)
df_train_kpi = df_train
df_train_kpi['Omsætning (t-1)'] = df_train_kpi['Omsætning (t)'].shift(1)
df_train_kpi['Vareforbrug (t-1)'] = df_train_kpi['Vareforbrug (t)'].shift(1)
df_train_kpi['Lønforbrug (t-1)'] = df_train_kpi['Lønforbrug (t)'].shift(1)
df_train_kpi['Faktureringsgrad (t-1)'] = df_train_kpi['Faktureringsgrad (t)'].shift(1)
df_train_kpi['Antal produktionsmedarbejdere (t-1)'] = df_train_kpi['Antal produktionsmedarbejdere (t)'].shift(1)
df_train_kpi['Arbejdsdage - ferie (t-1)'] = df_train_kpi['Arbejdsdage - ferie (t)'].shift(1)
df_train_kpi['Fakturerbar tid (t-1)'] = df_train_kpi['Fakturerbar tid (t)'].shift(1)
df_train_kpi['Jobtimer (t-1)'] = df_train_kpi['Jobtimer (t)'].shift(1)
df_train_kpi = df_train_kpi.dropna(axis=0)
pd.options.mode.chained_assignment = None
df_train_kpi['År (t)'] = df_train_kpi.index.year
df_train_kpi['Måned (t)'] = df_train_kpi.index.month
dummies = pd.get_dummies(df_train_kpi['Måned (t)'])
df_train_kpi = df_train_kpi.drop('Måned (t)',axis = 1)
dummies = dummies.drop(1,axis = 1)
dummies.columns = ['Februar (t)','Marts (t)','April (t)','Maj (t)','Juni (t)','Juli (t)','August (t)','September (t)',
'Oktober (t)','November (t)','December (t)']
df_train_kpi = df_train_kpi.join(dummies)
df_monthly_dummies = pd.DataFrame(df.index)
df_monthly_dummies = df_monthly_dummies.set_index('Periode')
df_monthly_dummies['Måned (t)'] = df_monthly_dummies.index.month
dummies = pd.get_dummies(df_monthly_dummies['Måned (t)'])
df_monthly_dummies = df_monthly_dummies.drop('Måned (t)',axis = 1)
dummies = dummies.drop(1,axis = 1)
dummies.columns = ['Februar (t)','Marts (t)','April (t)','Maj (t)','Juni (t)','Juli (t)','August (t)','September (t)',
'Oktober (t)','November (t)','December (t)']
df_monthly_dummies = df_monthly_dummies.join(dummies)
df_train_kpi_omsætning = df_train_kpi.drop(['Arbejdsdage - ferie (t-1)','Faktureringsgrad (t-1)',
'Antal produktionsmedarbejdere (t-1)'],axis = 1)
df_train_kpi_omsætning.corr()['Omsætning (t)'][:].sort_values()
Vareforbrug (t-1) -0 Omsætning (t-1) -0 Fakturerbar tid (t-1) -0 Jobtimer (t-1) -0 August (t) -0 Lønforbrug (t-1) -0 Maj (t) -0 April (t) -0 December (t) -0 November (t) -0 Juni (t) -0 Februar (t) 0 Faktureringsgrad (t) 0 Oktober (t) 0 År (t) 0 September (t) 0 Juli (t) 0 Marts (t) 0 Antal produktionsmedarbejdere (t) 0 Arbejdsdage - ferie (t) 0 Lønforbrug (t) 0 Fakturerbar tid (t) 0 Jobtimer (t) 0 Vareforbrug (t) 1 Omsætning (t) 1 Name: Omsætning (t), dtype: float64
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
Y_Omsætning = df_train_kpi_omsætning['Omsætning (t)']
X_Omsætning = df_train_kpi_omsætning.drop('Omsætning (t)',axis = 1)
X_Omsætning = X_Omsætning.drop(['Fakturerbar tid (t)','Fakturerbar tid (t-1)'],axis = 1)
rf = RandomForestRegressor(n_estimators = 1000)
rf.fit(X_Omsætning, Y_Omsætning)
sorted_idx = rf.feature_importances_.argsort()
plt.barh(X_Omsætning.columns[sorted_idx], rf.feature_importances_[sorted_idx])
plt.xlabel("Random Forest Feature Importance")
plt.show()
Valgte kpi'er:
Vareforbrug (t)
Vareforbrug (t-1)
Omsætning (t-1)
Jobtimer (t)
df_train_omsætning_variables = df_train_kpi[['Omsætning (t)','Vareforbrug (t)','Vareforbrug (t-1)',
'Omsætning (t-1)','Jobtimer (t)']]
df_val_omsætning_variables1 = pd.DataFrame(df_train[['Omsætning (t)','Vareforbrug (t)','Jobtimer (t)']])
df_val_omsætning_variables1 = pd.DataFrame(df_val_omsætning_variables1.iloc[-1]).transpose()
df_val_omsætning_variables2 = pd.DataFrame(df_val[['Omsætning (t)','Vareforbrug (t)','Jobtimer (t)']])
df_val_omsætning_variables = df_val_omsætning_variables1.append(df_val_omsætning_variables2)
df_val_omsætning_variables['Omsætning (t-1)'] = df_val_omsætning_variables['Omsætning (t)'].shift(1)
df_val_omsætning_variables['Vareforbrug (t-1)'] = df_val_omsætning_variables['Vareforbrug (t)'].shift(1)
df_val_omsætning_variables = df_val_omsætning_variables.dropna(axis = 0)
df_test_omsætning_variables1 = pd.DataFrame(df_val[['Omsætning (t)','Vareforbrug (t)','Jobtimer (t)']])
df_test_omsætning_variables1 = pd.DataFrame(df_test_omsætning_variables1.iloc[-1]).transpose()
df_test_omsætning_variables2 = pd.DataFrame(df_test[['Omsætning (t)','Vareforbrug (t)','Jobtimer (t)']])
df_test_omsætning_variables = df_test_omsætning_variables1.append(df_test_omsætning_variables2)
df_test_omsætning_variables['Omsætning (t-1)'] = df_test_omsætning_variables['Omsætning (t)'].shift(1)
df_test_omsætning_variables['Vareforbrug (t-1)'] = df_test_omsætning_variables['Vareforbrug (t)'].shift(1)
df_test_omsætning_variables = df_test_omsætning_variables.dropna(axis = 0)
df_train_kpi_vareforbrug = df_train_kpi.drop(['Omsætning (t)','Arbejdsdage - ferie (t-1)','Faktureringsgrad (t-1)',
'Antal produktionsmedarbejdere (t-1)'],axis = 1)
df_train_kpi_vareforbrug.corr()['Vareforbrug (t)'][:].sort_values()
Vareforbrug (t-1) -0 Omsætning (t-1) -0 Jobtimer (t-1) -0 August (t) -0 Fakturerbar tid (t-1) -0 December (t) -0 Lønforbrug (t-1) -0 April (t) -0 Maj (t) -0 Oktober (t) -0 Juni (t) -0 Faktureringsgrad (t) -0 Februar (t) 0 År (t) 0 November (t) 0 September (t) 0 Arbejdsdage - ferie (t) 0 Marts (t) 0 Antal produktionsmedarbejdere (t) 0 Juli (t) 0 Fakturerbar tid (t) 0 Lønforbrug (t) 0 Jobtimer (t) 0 Vareforbrug (t) 1 Name: Vareforbrug (t), dtype: float64
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
Y_Vareforbrug = df_train_kpi_vareforbrug['Vareforbrug (t)']
X_Vareforbrug = df_train_kpi_vareforbrug.drop('Vareforbrug (t)',axis = 1)
X_Vareforbrug = X_Vareforbrug.drop(['Fakturerbar tid (t)','Fakturerbar tid (t-1)'],axis = 1)
rf = RandomForestRegressor(n_estimators = 1000)
rf.fit(X_Vareforbrug, Y_Vareforbrug)
sorted_idx = rf.feature_importances_.argsort()
plt.barh(X_Vareforbrug.columns[sorted_idx], rf.feature_importances_[sorted_idx])
plt.xlabel("Random Forest Feature Importance")
plt.show()
Valgte kpi'er:
Vareforbrug (t-1)
Omsætning (t-1)
Antal produktionsmedarbejdere (t)
Jobtimer (t)
Lønforbrug (t)
df_train_vareforbrug_variables = df_train_kpi[['Vareforbrug (t)','Vareforbrug (t-1)','Omsætning (t-1)',
'Antal produktionsmedarbejdere (t)','Jobtimer (t)','Lønforbrug (t)']]
df_val_vareforbrug_variables1 = pd.DataFrame(df_train[['Vareforbrug (t)','Omsætning (t)','Antal produktionsmedarbejdere (t)',
'Jobtimer (t)','Lønforbrug (t)']])
df_val_vareforbrug_variables1 = pd.DataFrame(df_val_vareforbrug_variables1.iloc[-1]).transpose()
df_val_vareforbrug_variables2 = pd.DataFrame(df_val[['Vareforbrug (t)','Omsætning (t)','Antal produktionsmedarbejdere (t)',
'Jobtimer (t)','Lønforbrug (t)']])
df_val_vareforbrug_variables = df_val_vareforbrug_variables1.append(df_val_vareforbrug_variables2)
df_val_vareforbrug_variables['Vareforbrug (t-1)'] = df_val_vareforbrug_variables['Vareforbrug (t)'].shift(1)
df_val_vareforbrug_variables['Omsætning (t-1)'] = df_val_vareforbrug_variables['Omsætning (t)'].shift(1)
df_val_vareforbrug_variables = df_val_vareforbrug_variables.dropna(axis = 0)
df_test_vareforbrug_variables1 = pd.DataFrame(df_val[['Vareforbrug (t)','Omsætning (t)','Antal produktionsmedarbejdere (t)',
'Jobtimer (t)','Lønforbrug (t)']])
df_test_vareforbrug_variables1 = pd.DataFrame(df_test_vareforbrug_variables1.iloc[-1]).transpose()
df_test_vareforbrug_variables2 = pd.DataFrame(df_test[['Vareforbrug (t)','Omsætning (t)','Antal produktionsmedarbejdere (t)',
'Jobtimer (t)','Lønforbrug (t)']])
df_test_vareforbrug_variables = df_test_vareforbrug_variables1.append(df_test_vareforbrug_variables2)
df_test_vareforbrug_variables['Vareforbrug (t-1)'] = df_test_vareforbrug_variables['Vareforbrug (t)'].shift(1)
df_test_vareforbrug_variables['Omsætning (t-1)'] = df_test_vareforbrug_variables['Omsætning (t)'].shift(1)
df_test_vareforbrug_variables = df_test_vareforbrug_variables.dropna(axis = 0)
df_train_kpi_lønforbrug = df_train_kpi.drop(['Vareforbrug (t)','Omsætning (t)','Arbejdsdage - ferie (t-1)',
'Faktureringsgrad (t-1)','Antal produktionsmedarbejdere (t-1)'],axis = 1)
df_train_kpi_lønforbrug.corr()['Lønforbrug (t)'][:].sort_values()
Lønforbrug (t-1) -0 Vareforbrug (t-1) -0 Jobtimer (t-1) -0 September (t) -0 Omsætning (t-1) -0 Oktober (t) -0 August (t) -0 Fakturerbar tid (t-1) -0 Faktureringsgrad (t) -0 Maj (t) -0 Arbejdsdage - ferie (t) 0 April (t) 0 År (t) 0 Juli (t) 0 Antal produktionsmedarbejdere (t) 0 Marts (t) 0 Juni (t) 0 November (t) 0 Februar (t) 0 December (t) 0 Fakturerbar tid (t) 0 Jobtimer (t) 1 Lønforbrug (t) 1 Name: Lønforbrug (t), dtype: float64
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
Y_Lønforbrug = df_train_kpi_lønforbrug['Lønforbrug (t)']
X_Lønforbrug = df_train_kpi_lønforbrug.drop('Lønforbrug (t)',axis = 1)
X_Lønforbrug = X_Lønforbrug.drop(['Fakturerbar tid (t)','Fakturerbar tid (t-1)'],axis = 1)
rf = RandomForestRegressor(n_estimators = 1000)
rf.fit(X_Lønforbrug, Y_Lønforbrug)
sorted_idx = rf.feature_importances_.argsort()
plt.barh(X_Lønforbrug.columns[sorted_idx], rf.feature_importances_[sorted_idx])
plt.xlabel("Random Forest Feature Importance")
plt.show()
Valgte kpi'er:
Jobtimer (t)
Lønforbrug (t-1)
Arbejdsdage - ferie (t)
Antal produktionsmedarbejdere (t)
df_train_lønforbrug_variables = df_train_kpi[['Lønforbrug (t)','Jobtimer (t)','Lønforbrug (t-1)','Arbejdsdage - ferie (t)',
'Antal produktionsmedarbejdere (t)']]
df_val_lønforbrug_variables1 = pd.DataFrame(df_train[['Lønforbrug (t)','Jobtimer (t)','Arbejdsdage - ferie (t)',
'Antal produktionsmedarbejdere (t)']])
df_val_lønforbrug_variables1 = pd.DataFrame(df_val_lønforbrug_variables1.iloc[-1]).transpose()
df_val_lønforbrug_variables2 = pd.DataFrame(df_val[['Lønforbrug (t)','Jobtimer (t)','Arbejdsdage - ferie (t)',
'Antal produktionsmedarbejdere (t)']])
df_val_lønforbrug_variables = df_val_lønforbrug_variables1.append(df_val_lønforbrug_variables2)
df_val_lønforbrug_variables['Lønforbrug (t-1)'] = df_val_lønforbrug_variables['Lønforbrug (t)'].shift(1)
df_val_lønforbrug_variables = df_val_lønforbrug_variables.dropna(axis = 0)
df_test_lønforbrug_variables1 = pd.DataFrame(df_val[['Lønforbrug (t)','Jobtimer (t)','Arbejdsdage - ferie (t)',
'Antal produktionsmedarbejdere (t)']])
df_test_lønforbrug_variables1 = pd.DataFrame(df_test_lønforbrug_variables1.iloc[-1]).transpose()
df_test_lønforbrug_variables2 = pd.DataFrame(df_test[['Lønforbrug (t)','Jobtimer (t)','Arbejdsdage - ferie (t)',
'Antal produktionsmedarbejdere (t)']])
df_test_lønforbrug_variables = df_test_lønforbrug_variables1.append(df_test_lønforbrug_variables2)
df_test_lønforbrug_variables['Lønforbrug (t-1)'] = df_test_lønforbrug_variables['Lønforbrug (t)'].shift(1)
df_test_lønforbrug_variables = df_test_lønforbrug_variables.dropna(axis = 0)
df_train_kpi_faktureringsgrad = df_train_kpi.drop(['Vareforbrug (t)','Omsætning (t)','Arbejdsdage - ferie (t-1)',
'Lønforbrug (t)','Antal produktionsmedarbejdere (t-1)',
'Jobtimer (t)'],axis = 1)
df_train_kpi_faktureringsgrad.corr()['Faktureringsgrad (t)'][:].sort_values()
Februar (t) -0 Arbejdsdage - ferie (t) -0 September (t) -0 December (t) -0 Oktober (t) -0 Marts (t) -0 Fakturerbar tid (t) -0 November (t) 0 Antal produktionsmedarbejdere (t) 0 Maj (t) 0 Jobtimer (t-1) 0 August (t) 0 Fakturerbar tid (t-1) 0 Omsætning (t-1) 0 Juni (t) 0 Vareforbrug (t-1) 0 År (t) 0 Juli (t) 0 Lønforbrug (t-1) 0 April (t) 0 Faktureringsgrad (t-1) 0 Faktureringsgrad (t) 1 Name: Faktureringsgrad (t), dtype: float64
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
Y_Faktureringsgrad = df_train_kpi_faktureringsgrad['Faktureringsgrad (t)']
X_Faktureringsgrad = df_train_kpi_faktureringsgrad.drop('Faktureringsgrad (t)',axis = 1)
rf = RandomForestRegressor(n_estimators = 1000)
rf.fit(X_Faktureringsgrad, Y_Faktureringsgrad)
sorted_idx = rf.feature_importances_.argsort()
plt.barh(X_Faktureringsgrad.columns[sorted_idx], rf.feature_importances_[sorted_idx])
plt.xlabel("Random Forest Feature Importance")
plt.show()
Valgte kpi'er:
Faktureringsgrad (t-1)
Måned (t)
Arbejdsdage - ferie (t)
df_train_faktureringsgrad_variables = df_train_kpi[['Faktureringsgrad (t)','Faktureringsgrad (t-1)','Arbejdsdage - ferie (t)',
'Februar (t)','Marts (t)','April (t)','Maj (t)','Juni (t)','Juli (t)',
'August (t)','September (t)','Oktober (t)','November (t)','December (t)']]
df_val_faktureringsgrad_variables1 = pd.DataFrame(df_train[['Faktureringsgrad (t)','Arbejdsdage - ferie (t)']])
df_val_faktureringsgrad_variables1 = pd.DataFrame(df_val_faktureringsgrad_variables1.iloc[-1]).transpose()
df_val_faktureringsgrad_variables2 = pd.DataFrame(df_val[['Faktureringsgrad (t)','Arbejdsdage - ferie (t)']])
df_val_faktureringsgrad_variables = df_val_faktureringsgrad_variables1.append(df_val_faktureringsgrad_variables2)
df_val_faktureringsgrad_variables['Faktureringsgrad (t-1)'] = df_val_faktureringsgrad_variables['Faktureringsgrad (t)'].shift(1)
df_val_faktureringsgrad_variables = df_val_faktureringsgrad_variables.dropna(axis = 0)
df_val_faktureringsgrad_variables = df_val_faktureringsgrad_variables.merge(df_monthly_dummies, left_index = True,
right_index = True)
df_test_faktureringsgrad_variables1 = pd.DataFrame(df_val[['Faktureringsgrad (t)','Arbejdsdage - ferie (t)']])
df_test_faktureringsgrad_variables1 = pd.DataFrame(df_test_faktureringsgrad_variables1.iloc[-1]).transpose()
df_test_faktureringsgrad_variables2 = pd.DataFrame(df_test[['Faktureringsgrad (t)','Arbejdsdage - ferie (t)']])
df_test_faktureringsgrad_variables = df_test_faktureringsgrad_variables1.append(df_test_faktureringsgrad_variables2)
df_test_faktureringsgrad_variables['Faktureringsgrad (t-1)'] = df_test_faktureringsgrad_variables['Faktureringsgrad (t)'].shift(1)
df_test_faktureringsgrad_variables = df_test_faktureringsgrad_variables.dropna(axis = 0)
df_test_faktureringsgrad_variables = df_test_faktureringsgrad_variables.merge(df_monthly_dummies, left_index = True,
right_index = True)
df_train_kpi_fakturerbartid= df_train_kpi.drop(['Vareforbrug (t)','Omsætning (t)','Arbejdsdage - ferie (t-1)',
'Lønforbrug (t)','Antal produktionsmedarbejdere (t-1)',
'Jobtimer (t)','Faktureringsgrad (t)'],axis = 1)
df_train_kpi_fakturerbartid.corr()['Fakturerbar tid (t)'][:].sort_values()
Jobtimer (t-1) -0 Fakturerbar tid (t-1) -0 April (t) -0 Omsætning (t-1) -0 Vareforbrug (t-1) -0 August (t) -0 Lønforbrug (t-1) -0 November (t) -0 Oktober (t) -0 Juni (t) -0 Februar (t) -0 Juli (t) 0 Faktureringsgrad (t-1) 0 Maj (t) 0 September (t) 0 December (t) 0 År (t) 0 Marts (t) 0 Antal produktionsmedarbejdere (t) 0 Arbejdsdage - ferie (t) 0 Fakturerbar tid (t) 1 Name: Fakturerbar tid (t), dtype: float64
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
Y_Fakturerbartid = df_train_kpi_fakturerbartid['Fakturerbar tid (t)']
X_Fakturerbartid = df_train_kpi_fakturerbartid.drop('Fakturerbar tid (t)',axis = 1)
rf = RandomForestRegressor(n_estimators = 1000)
rf.fit(X_Fakturerbartid, Y_Fakturerbartid)
sorted_idx = rf.feature_importances_.argsort()
plt.barh(X_Fakturerbartid.columns[sorted_idx], rf.feature_importances_[sorted_idx])
plt.xlabel("Random Forest Feature Importance")
plt.show()
Valgte kpi'er:
Antal produktionsmedarbejdere (t)
Arbejdsdage - ferie (t)
Fakturerbar tid (t-1)
df_train_fakturerbartid_variables = df_train_kpi[['Fakturerbar tid (t)','Antal produktionsmedarbejdere (t)',
'Arbejdsdage - ferie (t)','Fakturerbar tid (t-1)']]
df_val_fakturerbartid_variables1 = pd.DataFrame(df_train[['Fakturerbar tid (t)','Antal produktionsmedarbejdere (t)',
'Arbejdsdage - ferie (t)']])
df_val_fakturerbartid_variables1 = pd.DataFrame(df_val_fakturerbartid_variables1.iloc[-1]).transpose()
df_val_fakturerbartid_variables2 = pd.DataFrame(df_val[['Fakturerbar tid (t)','Antal produktionsmedarbejdere (t)',
'Arbejdsdage - ferie (t)']])
df_val_fakturerbartid_variables = df_val_fakturerbartid_variables1.append(df_val_fakturerbartid_variables2)
df_val_fakturerbartid_variables['Fakturerbar tid (t-1)'] = df_val_fakturerbartid_variables['Fakturerbar tid (t)'].shift(1)
df_val_fakturerbartid_variables = df_val_fakturerbartid_variables.dropna(axis = 0)
df_test_fakturerbartid_variables1 = pd.DataFrame(df_val[['Fakturerbar tid (t)','Antal produktionsmedarbejdere (t)',
'Arbejdsdage - ferie (t)']])
df_test_fakturerbartid_variables1 = pd.DataFrame(df_test_fakturerbartid_variables1.iloc[-1]).transpose()
df_test_fakturerbartid_variables2 = pd.DataFrame(df_test[['Fakturerbar tid (t)','Antal produktionsmedarbejdere (t)',
'Arbejdsdage - ferie (t)']])
df_test_fakturerbartid_variables = df_test_fakturerbartid_variables1.append(df_test_fakturerbartid_variables2)
df_test_fakturerbartid_variables['Fakturerbar tid (t-1)'] = df_test_fakturerbartid_variables['Fakturerbar tid (t)'].shift(1)
df_test_fakturerbartid_variables = df_test_fakturerbartid_variables.dropna(axis = 0)
df_train_kpi_antalp= df_train_kpi.drop(['Vareforbrug (t)','Omsætning (t)','Arbejdsdage - ferie (t-1)','Lønforbrug (t)',
'Fakturerbar tid (t)','Jobtimer (t)','Faktureringsgrad (t)'],axis = 1)
df_train_kpi_antalp.corr()['Antal produktionsmedarbejdere (t)'][:].sort_values()
August (t) -0 Arbejdsdage - ferie (t) -0 Juni (t) -0 September (t) -0 Februar (t) -0 Oktober (t) -0 Marts (t) -0 November (t) 0 Maj (t) 0 December (t) 0 April (t) 0 Vareforbrug (t-1) 0 Faktureringsgrad (t-1) 0 Lønforbrug (t-1) 0 Omsætning (t-1) 0 Fakturerbar tid (t-1) 0 År (t) 0 Jobtimer (t-1) 0 Juli (t) 0 Antal produktionsmedarbejdere (t-1) 0 Antal produktionsmedarbejdere (t) 1 Name: Antal produktionsmedarbejdere (t), dtype: float64
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
Y_Antalp = df_train_kpi_antalp['Antal produktionsmedarbejdere (t)']
X_Antalp = df_train_kpi_antalp.drop('Antal produktionsmedarbejdere (t)',axis = 1)
rf = RandomForestRegressor(n_estimators = 1000)
rf.fit(X_Antalp, Y_Antalp)
sorted_idx = rf.feature_importances_.argsort()
plt.barh(X_Antalp.columns[sorted_idx], rf.feature_importances_[sorted_idx])
plt.xlabel("Random Forest Feature Importance")
plt.show()
Valgte kpi'er:
Antal produktionsmedarbejdere (t-1)
Måned (t)
df_train_antalp_variables = df_train_kpi[['Antal produktionsmedarbejdere (t)','Antal produktionsmedarbejdere (t-1)',
'Februar (t)','Marts (t)','April (t)','Maj (t)','Juni (t)','Juli (t)',
'August (t)','September (t)','Oktober (t)','November (t)','December (t)']]
df_val_antalp_variables1 = pd.DataFrame(df_train[['Antal produktionsmedarbejdere (t)']])
df_val_antalp_variables1 = pd.DataFrame(df_val_antalp_variables1.iloc[-1]).transpose()
df_val_antalp_variables2 = pd.DataFrame(df_val[['Antal produktionsmedarbejdere (t)']])
df_val_antalp_variables = df_val_antalp_variables1.append(df_val_antalp_variables2)
df_val_antalp_variables['Antal produktionsmedarbejdere (t-1)'] = df_val_antalp_variables['Antal produktionsmedarbejdere (t)'].shift(1)
df_val_antalp_variables = df_val_antalp_variables.dropna(axis = 0)
df_val_antalp_variables = df_val_antalp_variables.merge(df_monthly_dummies, left_index = True,right_index = True)
df_test_antalp_variables1 = pd.DataFrame(df_val[['Antal produktionsmedarbejdere (t)']])
df_test_antalp_variables1 = pd.DataFrame(df_test_antalp_variables1.iloc[-1]).transpose()
df_test_antalp_variables2 = pd.DataFrame(df_test[['Antal produktionsmedarbejdere (t)']])
df_test_antalp_variables = df_test_antalp_variables1.append(df_test_antalp_variables2)
df_test_antalp_variables['Antal produktionsmedarbejdere (t-1)'] = df_test_antalp_variables['Antal produktionsmedarbejdere (t)'].shift(1)
df_test_antalp_variables = df_test_antalp_variables.dropna(axis = 0)
df_test_antalp_variables = df_test_antalp_variables.merge(df_monthly_dummies, left_index = True,right_index = True)
from pycaret.regression import *
from sklearn.metrics import mean_squared_error
setup_omsætning = setup(data = df_train_omsætning_variables,
train_size = 1.0,
test_data = df_val_omsætning_variables,
target = 'Omsætning (t)',
fold_strategy = 'timeseries',
normalize = False,
transform_target = False,
session_id=123)
| Description | Value | |
|---|---|---|
| 0 | session_id | 123 |
| 1 | Target | Omsætning (t) |
| 2 | Original Data | (46, 5) |
| 3 | Missing Values | False |
| 4 | Numeric Features | 4 |
| 5 | Categorical Features | 0 |
| 6 | Ordinal Features | False |
| 7 | High Cardinality Features | False |
| 8 | High Cardinality Method | None |
| 9 | Transformed Train Set | (46, 4) |
| 10 | Transformed Test Set | (8, 4) |
| 11 | Shuffle Train-Test | True |
| 12 | Stratify Train-Test | False |
| 13 | Fold Generator | TimeSeriesSplit |
| 14 | Fold Number | 10 |
| 15 | CPU Jobs | -1 |
| 16 | Use GPU | False |
| 17 | Log Experiment | False |
| 18 | Experiment Name | reg-default-name |
| 19 | USI | 58ed |
| 20 | Imputation Type | simple |
| 21 | Iterative Imputation Iteration | None |
| 22 | Numeric Imputer | mean |
| 23 | Iterative Imputation Numeric Model | None |
| 24 | Categorical Imputer | constant |
| 25 | Iterative Imputation Categorical Model | None |
| 26 | Unknown Categoricals Handling | least_frequent |
| 27 | Normalize | False |
| 28 | Normalize Method | None |
| 29 | Transformation | False |
| 30 | Transformation Method | None |
| 31 | PCA | False |
| 32 | PCA Method | None |
| 33 | PCA Components | None |
| 34 | Ignore Low Variance | False |
| 35 | Combine Rare Levels | False |
| 36 | Rare Level Threshold | None |
| 37 | Numeric Binning | False |
| 38 | Remove Outliers | False |
| 39 | Outliers Threshold | None |
| 40 | Remove Multicollinearity | False |
| 41 | Multicollinearity Threshold | None |
| 42 | Remove Perfect Collinearity | True |
| 43 | Clustering | False |
| 44 | Clustering Iteration | None |
| 45 | Polynomial Features | False |
| 46 | Polynomial Degree | None |
| 47 | Trignometry Features | False |
| 48 | Polynomial Threshold | None |
| 49 | Group Features | False |
| 50 | Feature Selection | False |
| 51 | Feature Selection Method | classic |
| 52 | Features Selection Threshold | None |
| 53 | Feature Interaction | False |
| 54 | Feature Ratio | False |
| 55 | Interaction Threshold | None |
| 56 | Transform Target | False |
| 57 | Transform Target Method | box-cox |
models = compare_models(sort ='RMSE',exclude =['knn'],turbo = False)
results = pull()
results.Model.tolist()
| Model | MAE | MSE | RMSE | R2 | RMSLE | MAPE | TT (Sec) | |
|---|---|---|---|---|---|---|---|---|
| lr | Linear Regression | 0.0631 | 0.0073 | 0.0761 | 0.3176 | 0.0517 | 0.1714 | 0.6100 |
| lar | Least Angle Regression | 0.0631 | 0.0073 | 0.0761 | 0.3176 | 0.0517 | 0.1714 | 0.0080 |
| ard | Automatic Relevance Determination | 0.0656 | 0.0076 | 0.0781 | 0.1600 | 0.0526 | 0.1796 | 0.0080 |
| br | Bayesian Ridge | 0.0659 | 0.0083 | 0.0796 | 0.3050 | 0.0542 | 0.1789 | 0.0080 |
| huber | Huber Regressor | 0.0668 | 0.0079 | 0.0799 | 0.1509 | 0.0538 | 0.1832 | 0.0120 |
| tr | TheilSen Regressor | 0.0689 | 0.0087 | 0.0828 | 0.1639 | 0.0577 | 0.1536 | 0.2150 |
| omp | Orthogonal Matching Pursuit | 0.0778 | 0.0098 | 0.0881 | -0.2745 | 0.0593 | 0.2212 | 0.0070 |
| et | Extra Trees Regressor | 0.0868 | 0.0124 | 0.1029 | 0.1103 | 0.0711 | 0.1997 | 0.0850 |
| ransac | Random Sample Consensus | 0.0873 | 0.0117 | 0.1034 | -0.4605 | 0.0691 | 0.2044 | 0.0100 |
| par | Passive Aggressive Regressor | 0.1061 | 0.0185 | 0.1180 | -0.5381 | 0.0793 | 0.2794 | 0.0130 |
| xgboost | Extreme Gradient Boosting | 0.1063 | 0.0161 | 0.1190 | -1.7753 | 0.0827 | 0.2426 | 0.0700 |
| ada | AdaBoost Regressor | 0.0991 | 0.0163 | 0.1193 | -0.6517 | 0.0825 | 0.2704 | 0.0380 |
| rf | Random Forest Regressor | 0.0978 | 0.0168 | 0.1199 | -0.2297 | 0.0817 | 0.2531 | 0.1080 |
| kr | Kernel Ridge | 0.1023 | 0.0174 | 0.1221 | -0.3646 | 0.0825 | 0.2336 | 0.0100 |
| ridge | Ridge Regression | 0.1034 | 0.0184 | 0.1250 | 0.0558 | 0.0842 | 0.2576 | 0.0100 |
| gbr | Gradient Boosting Regressor | 0.1040 | 0.0188 | 0.1261 | -1.9228 | 0.0841 | 0.2322 | 0.0190 |
| catboost | CatBoost Regressor | 0.0980 | 0.0196 | 0.1287 | -0.4958 | 0.0868 | 0.1994 | 0.3070 |
| svm | Support Vector Regression | 0.1130 | 0.0241 | 0.1344 | -0.2499 | 0.0915 | 0.2950 | 0.0080 |
| mlp | MLP Regressor | 0.1160 | 0.0274 | 0.1480 | -0.2847 | 0.1006 | 0.3078 | 0.0240 |
| dt | Decision Tree Regressor | 0.1284 | 0.0280 | 0.1584 | -2.8811 | 0.1064 | 0.3215 | 0.0080 |
| llar | Lasso Least Angle Regression | 0.1612 | 0.0455 | 0.1966 | -0.8035 | 0.1324 | 0.5287 | 0.0080 |
| lasso | Lasso Regression | 0.1612 | 0.0455 | 0.1966 | -0.8035 | 0.1324 | 0.5287 | 0.0080 |
| en | Elastic Net | 0.1612 | 0.0455 | 0.1966 | -0.8035 | 0.1324 | 0.5287 | 0.0100 |
| lightgbm | Light Gradient Boosting Machine | 0.1636 | 0.0464 | 0.1991 | -0.8356 | 0.1350 | 0.5367 | 0.1530 |
['Linear Regression', 'Least Angle Regression', 'Automatic Relevance Determination', 'Bayesian Ridge', 'Huber Regressor', 'TheilSen Regressor', 'Orthogonal Matching Pursuit', 'Extra Trees Regressor', 'Random Sample Consensus', 'Passive Aggressive Regressor', 'Extreme Gradient Boosting', 'AdaBoost Regressor', 'Random Forest Regressor', 'Kernel Ridge', 'Ridge Regression', 'Gradient Boosting Regressor', 'CatBoost Regressor', 'Support Vector Regression', 'MLP Regressor', 'Decision Tree Regressor', 'Lasso Least Angle Regression', 'Lasso Regression', 'Elastic Net', 'Light Gradient Boosting Machine']
rmse = pd.DataFrame()
for i in range (0,len(results)):
best_model = create_model(results.index[i])
tuned_model = tune_model(best_model)
currentmodelname = results.index[i]
tuned_model_predictions = predict_model(tuned_model,data = df_val_omsætning_variables)
actual = tuned_model_predictions['Omsætning (t)']
predictions = tuned_model_predictions['Label']
forecast_actual = list(zip(predictions,actual))
forecast_period = df_val_omsætning_variables.index
forecast_result = pd.DataFrame(forecast_actual, columns = ['Forecast','Faktiske'], index = forecast_period, dtype='float')
rmse.loc[currentmodelname,'RMSE'] = mean_squared_error(actual,predictions,squared=False)
| MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
|---|---|---|---|---|---|---|
| 0 | 0.2234 | 0.0545 | 0.2334 | -0.5862 | 0.1667 | 0.8114 |
| 1 | 0.1175 | 0.0164 | 0.1282 | -5.2716 | 0.0829 | 0.1955 |
| 2 | 0.0347 | 0.0018 | 0.0429 | -1.1431 | 0.0292 | 0.0777 |
| 3 | 0.1848 | 0.0579 | 0.2406 | -0.3490 | 0.1440 | 0.2745 |
| 4 | 0.1476 | 0.0671 | 0.2590 | -0.4484 | 0.2086 | 0.0515 |
| 5 | 0.1632 | 0.0271 | 0.1647 | 0.4036 | 0.1124 | 0.4193 |
| 6 | 0.0820 | 0.0082 | 0.0904 | -0.0026 | 0.0602 | 0.1640 |
| 7 | 0.1303 | 0.0503 | 0.2242 | 0.1099 | 0.1273 | 0.1526 |
| 8 | 0.2440 | 0.0716 | 0.2676 | 0.3381 | 0.1901 | 2.1944 |
| 9 | 0.1542 | 0.0259 | 0.1608 | 0.0785 | 0.1030 | 0.2865 |
| Mean | 0.1482 | 0.0381 | 0.1812 | -0.6871 | 0.1225 | 0.4627 |
| SD | 0.0591 | 0.0239 | 0.0725 | 1.5918 | 0.0539 | 0.6134 |
rmse = rmse.sort_values(by=['RMSE'])
best_model = create_model(rmse.index[0])
final_model_omsætning = tune_model(best_model)
save_model(final_model_omsætning, 'Omsætning')
| MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
|---|---|---|---|---|---|---|
| 0 | 0.1385 | 0.0285 | 0.1688 | 0.1705 | 0.1226 | 0.5236 |
| 1 | 0.0831 | 0.0070 | 0.0839 | -1.6893 | 0.0550 | 0.1447 |
| 2 | 0.0296 | 0.0016 | 0.0403 | -0.8858 | 0.0268 | 0.0615 |
| 3 | 0.0819 | 0.0104 | 0.1021 | 0.7570 | 0.0578 | 0.1187 |
| 4 | 0.0936 | 0.0137 | 0.1172 | 0.7035 | 0.1007 | 0.1142 |
| 5 | 0.0282 | 0.0009 | 0.0296 | 0.9808 | 0.0190 | 0.0628 |
| 6 | 0.0328 | 0.0013 | 0.0361 | 0.8396 | 0.0239 | 0.0651 |
| 7 | 0.0728 | 0.0064 | 0.0797 | 0.8874 | 0.0506 | 0.1363 |
| 8 | 0.0335 | 0.0014 | 0.0369 | 0.9874 | 0.0229 | 0.3254 |
| 9 | 0.1289 | 0.0254 | 0.1593 | 0.0962 | 0.1086 | 0.2558 |
| Mean | 0.0723 | 0.0097 | 0.0854 | 0.2847 | 0.0588 | 0.1808 |
| SD | 0.0388 | 0.0096 | 0.0486 | 0.8581 | 0.0367 | 0.1404 |
Transformation Pipeline and Model Successfully Saved
(Pipeline(memory=None,
steps=[('dtypes',
DataTypes_Auto_infer(categorical_features=[],
display_types=True, features_todrop=[],
id_columns=[], ml_usecase='regression',
numerical_features=[],
target='Omsætning (t)',
time_features=[])),
('imputer',
Simple_Imputer(categorical_strategy='not_available',
fill_value_categorical=None,
fill_value_numerical=None,
numeric_str...
batch_size='auto', beta_1=0.9, beta_2=0.999,
early_stopping=False, epsilon=1e-08,
hidden_layer_sizes=[100, 100, 50],
learning_rate='adaptive',
learning_rate_init=0.001, max_fun=15000,
max_iter=500, momentum=0.9, n_iter_no_change=10,
nesterovs_momentum=True, power_t=0.5,
random_state=123, shuffle=True, solver='adam',
tol=0.0001, validation_fraction=0.1,
verbose=False, warm_start=False)]],
verbose=False),
'Omsætning.pkl')
rmse.index[0]
'mlp'
setup_vareforbrug = setup(data = df_train_vareforbrug_variables,
train_size = 1.0,
test_data = df_val_vareforbrug_variables,
target = 'Vareforbrug (t)',
fold_strategy = 'timeseries',
normalize = False,
transform_target = False,
session_id=123)
| Description | Value | |
|---|---|---|
| 0 | session_id | 123 |
| 1 | Target | Vareforbrug (t) |
| 2 | Original Data | (46, 6) |
| 3 | Missing Values | False |
| 4 | Numeric Features | 5 |
| 5 | Categorical Features | 0 |
| 6 | Ordinal Features | False |
| 7 | High Cardinality Features | False |
| 8 | High Cardinality Method | None |
| 9 | Transformed Train Set | (46, 5) |
| 10 | Transformed Test Set | (8, 5) |
| 11 | Shuffle Train-Test | True |
| 12 | Stratify Train-Test | False |
| 13 | Fold Generator | TimeSeriesSplit |
| 14 | Fold Number | 10 |
| 15 | CPU Jobs | -1 |
| 16 | Use GPU | False |
| 17 | Log Experiment | False |
| 18 | Experiment Name | reg-default-name |
| 19 | USI | 0e32 |
| 20 | Imputation Type | simple |
| 21 | Iterative Imputation Iteration | None |
| 22 | Numeric Imputer | mean |
| 23 | Iterative Imputation Numeric Model | None |
| 24 | Categorical Imputer | constant |
| 25 | Iterative Imputation Categorical Model | None |
| 26 | Unknown Categoricals Handling | least_frequent |
| 27 | Normalize | False |
| 28 | Normalize Method | None |
| 29 | Transformation | False |
| 30 | Transformation Method | None |
| 31 | PCA | False |
| 32 | PCA Method | None |
| 33 | PCA Components | None |
| 34 | Ignore Low Variance | False |
| 35 | Combine Rare Levels | False |
| 36 | Rare Level Threshold | None |
| 37 | Numeric Binning | False |
| 38 | Remove Outliers | False |
| 39 | Outliers Threshold | None |
| 40 | Remove Multicollinearity | False |
| 41 | Multicollinearity Threshold | None |
| 42 | Remove Perfect Collinearity | True |
| 43 | Clustering | False |
| 44 | Clustering Iteration | None |
| 45 | Polynomial Features | False |
| 46 | Polynomial Degree | None |
| 47 | Trignometry Features | False |
| 48 | Polynomial Threshold | None |
| 49 | Group Features | False |
| 50 | Feature Selection | False |
| 51 | Feature Selection Method | classic |
| 52 | Features Selection Threshold | None |
| 53 | Feature Interaction | False |
| 54 | Feature Ratio | False |
| 55 | Interaction Threshold | None |
| 56 | Transform Target | False |
| 57 | Transform Target Method | box-cox |
models = compare_models(sort ='RMSE',exclude =['knn'],turbo = False)
results = pull()
results.Model.tolist()
| Model | MAE | MSE | RMSE | R2 | RMSLE | MAPE | TT (Sec) | |
|---|---|---|---|---|---|---|---|---|
| ard | Automatic Relevance Determination | 0.1390 | 0.0334 | 0.1698 | -0.1349 | 0.1129 | 0.2999 | 0.0080 |
| omp | Orthogonal Matching Pursuit | 0.1415 | 0.0342 | 0.1708 | -0.2486 | 0.1137 | 0.2990 | 0.0090 |
| ridge | Ridge Regression | 0.1429 | 0.0342 | 0.1731 | -0.2249 | 0.1151 | 0.3053 | 0.0100 |
| br | Bayesian Ridge | 0.1408 | 0.0354 | 0.1748 | -0.1110 | 0.1164 | 0.3079 | 0.0070 |
| lightgbm | Light Gradient Boosting Machine | 0.1436 | 0.0379 | 0.1779 | -0.1185 | 0.1188 | 0.3009 | 0.0130 |
| et | Extra Trees Regressor | 0.1443 | 0.0383 | 0.1782 | -0.2352 | 0.1168 | 0.3047 | 0.1010 |
| rf | Random Forest Regressor | 0.1407 | 0.0388 | 0.1818 | -0.4454 | 0.1187 | 0.2992 | 0.1110 |
| ada | AdaBoost Regressor | 0.1440 | 0.0389 | 0.1821 | -0.2674 | 0.1190 | 0.3057 | 0.0500 |
| gbr | Gradient Boosting Regressor | 0.1461 | 0.0400 | 0.1857 | -0.4978 | 0.1200 | 0.3003 | 0.0220 |
| lasso | Lasso Regression | 0.1555 | 0.0412 | 0.1859 | -0.1728 | 0.1236 | 0.3265 | 0.0110 |
| llar | Lasso Least Angle Regression | 0.1555 | 0.0412 | 0.1859 | -0.1728 | 0.1236 | 0.3265 | 0.0070 |
| en | Elastic Net | 0.1555 | 0.0412 | 0.1859 | -0.1728 | 0.1236 | 0.3265 | 0.0090 |
| catboost | CatBoost Regressor | 0.1477 | 0.0407 | 0.1870 | -0.3820 | 0.1218 | 0.3207 | 0.2940 |
| mlp | MLP Regressor | 0.1682 | 0.0416 | 0.1931 | -0.9683 | 0.1288 | 0.3579 | 0.0240 |
| xgboost | Extreme Gradient Boosting | 0.1648 | 0.0464 | 0.1932 | -0.5198 | 0.1255 | 0.3526 | 0.0710 |
| huber | Huber Regressor | 0.1626 | 0.0454 | 0.1986 | -2.6686 | 0.1287 | 0.3336 | 0.0120 |
| svm | Support Vector Regression | 0.1620 | 0.0467 | 0.2009 | -0.6226 | 0.1317 | 0.3418 | 0.0130 |
| dt | Decision Tree Regressor | 0.1747 | 0.0516 | 0.2109 | -2.1022 | 0.1381 | 0.3479 | 0.0080 |
| kr | Kernel Ridge | 0.1925 | 0.0535 | 0.2235 | -2.0386 | 0.1506 | 0.3877 | 0.0070 |
| par | Passive Aggressive Regressor | 0.2045 | 0.0690 | 0.2532 | -4.4142 | 0.1681 | 0.4297 | 0.0080 |
| lr | Linear Regression | 0.2083 | 0.1530 | 0.2721 | -7.7946 | 0.1531 | 0.4122 | 0.0110 |
| tr | TheilSen Regressor | 0.2167 | 0.1566 | 0.2790 | -8.1374 | 0.1563 | 0.4181 | 0.2660 |
| ransac | Random Sample Consensus | 0.2704 | 0.1858 | 0.3389 | -12.1230 | 0.1959 | 0.5481 | 0.0300 |
| lar | Least Angle Regression | 0.8685 | 12.1656 | 1.2814 | -668.8928 | 0.2743 | 1.5460 | 0.0080 |
['Automatic Relevance Determination', 'Orthogonal Matching Pursuit', 'Ridge Regression', 'Bayesian Ridge', 'Light Gradient Boosting Machine', 'Extra Trees Regressor', 'Random Forest Regressor', 'AdaBoost Regressor', 'Gradient Boosting Regressor', 'Lasso Regression', 'Lasso Least Angle Regression', 'Elastic Net', 'CatBoost Regressor', 'MLP Regressor', 'Extreme Gradient Boosting', 'Huber Regressor', 'Support Vector Regression', 'Decision Tree Regressor', 'Kernel Ridge', 'Passive Aggressive Regressor', 'Linear Regression', 'TheilSen Regressor', 'Random Sample Consensus', 'Least Angle Regression']
rmse = pd.DataFrame()
for i in range (0,len(results)):
best_model = create_model(results.index[i])
tuned_model = tune_model(best_model)
currentmodelname = results.index[i]
tuned_model_predictions = predict_model(tuned_model,data = df_val_vareforbrug_variables)
actual = tuned_model_predictions['Vareforbrug (t)']
predictions = tuned_model_predictions['Label']
forecast_actual = list(zip(predictions,actual))
forecast_period = df_val_vareforbrug_variables.index
forecast_result = pd.DataFrame(forecast_actual, columns = ['Forecast','Faktiske'], index = forecast_period, dtype='float')
rmse.loc[currentmodelname,'RMSE'] = mean_squared_error(actual,predictions,squared=False)
| MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
|---|---|---|---|---|---|---|
| 0 | 0.2530 | 0.0675 | 0.2598 | -2.7026 | 0.1783 | 0.5337 |
| 1 | 0.2249 | 0.0709 | 0.2663 | -16.6173 | 0.1891 | 0.3747 |
| 2 | 0.1259 | 0.0236 | 0.1538 | -2.4637 | 0.1014 | 0.2144 |
| 3 | 0.2358 | 0.0783 | 0.2798 | -0.6508 | 0.1712 | 0.3815 |
| 4 | 0.1244 | 0.0272 | 0.1649 | 0.0701 | 0.1207 | 0.4845 |
| 5 | 0.0771 | 0.0116 | 0.1075 | 0.6427 | 0.0758 | 0.2197 |
| 6 | 0.2046 | 0.0469 | 0.2165 | -4.5564 | 0.1397 | 0.4305 |
| 7 | 0.2783 | 0.1070 | 0.3271 | -0.9991 | 0.1950 | 0.4358 |
| 8 | 0.2187 | 0.0783 | 0.2799 | 0.3577 | 0.2128 | 0.1663 |
| 9 | 0.2079 | 0.0701 | 0.2647 | -0.1627 | 0.1820 | 0.7563 |
| Mean | 0.1951 | 0.0581 | 0.2320 | -2.7082 | 0.1566 | 0.3997 |
| SD | 0.0611 | 0.0284 | 0.0656 | 4.8880 | 0.0426 | 0.1663 |
rmse = rmse.sort_values(by=['RMSE'])
best_model = create_model(rmse.index[0])
final_model_vareforbrug = tune_model(best_model)
save_model(final_model_vareforbrug, 'Vareforbrug')
| MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
|---|---|---|---|---|---|---|
| 0 | 0.1701 | 0.0486 | 0.2205 | -1.6674 | 0.1378 | 0.3700 |
| 1 | 0.1848 | 0.0627 | 0.2505 | -14.5877 | 0.1607 | 0.3323 |
| 2 | 0.1115 | 0.0233 | 0.1528 | -2.4192 | 0.0957 | 0.2278 |
| 3 | 0.1651 | 0.0347 | 0.1863 | 0.2676 | 0.1151 | 0.2969 |
| 4 | 0.2073 | 0.0802 | 0.2832 | -1.7436 | 0.1962 | 0.8666 |
| 5 | 0.3721 | 0.2322 | 0.4818 | -6.1808 | 0.2493 | 0.6674 |
| 6 | 0.1193 | 0.0200 | 0.1413 | -1.3657 | 0.0932 | 0.2585 |
| 7 | 0.1683 | 0.0535 | 0.2314 | -0.0005 | 0.1345 | 0.2428 |
| 8 | 0.2777 | 0.1044 | 0.3232 | 0.1438 | 0.2259 | 0.2959 |
| 9 | 0.1080 | 0.0281 | 0.1676 | 0.5340 | 0.1239 | 0.4840 |
| Mean | 0.1884 | 0.0688 | 0.2439 | -2.7019 | 0.1532 | 0.4042 |
| SD | 0.0778 | 0.0600 | 0.0965 | 4.3808 | 0.0512 | 0.1993 |
Transformation Pipeline and Model Successfully Saved
(Pipeline(memory=None,
steps=[('dtypes',
DataTypes_Auto_infer(categorical_features=[],
display_types=True, features_todrop=[],
id_columns=[], ml_usecase='regression',
numerical_features=[],
target='Vareforbrug (t)',
time_features=[])),
('imputer',
Simple_Imputer(categorical_strategy='not_available',
fill_value_categorical=None,
fill_value_numerical=None,
numeric_s...
('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
('dfs', 'passthrough'), ('pca', 'passthrough'),
['trained_model',
RANSACRegressor(base_estimator=None, is_data_valid=None,
is_model_valid=None, loss='squared_loss',
max_skips=13, max_trials=17, min_samples=0.2,
random_state=123, residual_threshold=None,
stop_n_inliers=10, stop_probability=0.75,
stop_score=inf)]],
verbose=False),
'Vareforbrug.pkl')
rmse.index[0]
'ransac'
setup_lønforbrug = setup(data = df_train_lønforbrug_variables,
train_size = 1.0,
test_data = df_val_lønforbrug_variables,
target = 'Lønforbrug (t)',
fold_strategy = 'timeseries',
normalize = False,
transform_target = False,
session_id=123)
| Description | Value | |
|---|---|---|
| 0 | session_id | 123 |
| 1 | Target | Lønforbrug (t) |
| 2 | Original Data | (46, 5) |
| 3 | Missing Values | False |
| 4 | Numeric Features | 4 |
| 5 | Categorical Features | 0 |
| 6 | Ordinal Features | False |
| 7 | High Cardinality Features | False |
| 8 | High Cardinality Method | None |
| 9 | Transformed Train Set | (46, 4) |
| 10 | Transformed Test Set | (8, 4) |
| 11 | Shuffle Train-Test | True |
| 12 | Stratify Train-Test | False |
| 13 | Fold Generator | TimeSeriesSplit |
| 14 | Fold Number | 10 |
| 15 | CPU Jobs | -1 |
| 16 | Use GPU | False |
| 17 | Log Experiment | False |
| 18 | Experiment Name | reg-default-name |
| 19 | USI | ac31 |
| 20 | Imputation Type | simple |
| 21 | Iterative Imputation Iteration | None |
| 22 | Numeric Imputer | mean |
| 23 | Iterative Imputation Numeric Model | None |
| 24 | Categorical Imputer | constant |
| 25 | Iterative Imputation Categorical Model | None |
| 26 | Unknown Categoricals Handling | least_frequent |
| 27 | Normalize | False |
| 28 | Normalize Method | None |
| 29 | Transformation | False |
| 30 | Transformation Method | None |
| 31 | PCA | False |
| 32 | PCA Method | None |
| 33 | PCA Components | None |
| 34 | Ignore Low Variance | False |
| 35 | Combine Rare Levels | False |
| 36 | Rare Level Threshold | None |
| 37 | Numeric Binning | False |
| 38 | Remove Outliers | False |
| 39 | Outliers Threshold | None |
| 40 | Remove Multicollinearity | False |
| 41 | Multicollinearity Threshold | None |
| 42 | Remove Perfect Collinearity | True |
| 43 | Clustering | False |
| 44 | Clustering Iteration | None |
| 45 | Polynomial Features | False |
| 46 | Polynomial Degree | None |
| 47 | Trignometry Features | False |
| 48 | Polynomial Threshold | None |
| 49 | Group Features | False |
| 50 | Feature Selection | False |
| 51 | Feature Selection Method | classic |
| 52 | Features Selection Threshold | None |
| 53 | Feature Interaction | False |
| 54 | Feature Ratio | False |
| 55 | Interaction Threshold | None |
| 56 | Transform Target | False |
| 57 | Transform Target Method | box-cox |
models = compare_models(sort ='RMSE',exclude =['knn'],turbo = False)
results = pull()
results.Model.tolist()
| Model | MAE | MSE | RMSE | R2 | RMSLE | MAPE | TT (Sec) | |
|---|---|---|---|---|---|---|---|---|
| tr | TheilSen Regressor | 0.1514 | 0.0371 | 0.1730 | 0.0017 | 0.1177 | 0.3877 | 0.2330 |
| ridge | Ridge Regression | 0.1554 | 0.0350 | 0.1780 | -0.0870 | 0.1225 | 0.3989 | 0.0120 |
| lr | Linear Regression | 0.1579 | 0.0377 | 0.1783 | -0.1948 | 0.1215 | 0.3963 | 0.0090 |
| lar | Least Angle Regression | 0.1579 | 0.0377 | 0.1783 | -0.1948 | 0.1215 | 0.3963 | 0.0120 |
| rf | Random Forest Regressor | 0.1543 | 0.0354 | 0.1796 | -0.1719 | 0.1236 | 0.4021 | 0.1250 |
| br | Bayesian Ridge | 0.1562 | 0.0371 | 0.1798 | -0.1845 | 0.1231 | 0.3991 | 0.0110 |
| ard | Automatic Relevance Determination | 0.1616 | 0.0377 | 0.1816 | -0.2787 | 0.1240 | 0.4083 | 0.0130 |
| huber | Huber Regressor | 0.1611 | 0.0405 | 0.1828 | -0.2853 | 0.1237 | 0.4017 | 0.0110 |
| et | Extra Trees Regressor | 0.1585 | 0.0374 | 0.1832 | -0.3608 | 0.1252 | 0.4021 | 0.1180 |
| ransac | Random Sample Consensus | 0.1570 | 0.0385 | 0.1841 | -0.0703 | 0.1232 | 0.3799 | 0.0160 |
| lightgbm | Light Gradient Boosting Machine | 0.1573 | 0.0379 | 0.1865 | -0.0655 | 0.1274 | 0.3889 | 0.0120 |
| omp | Orthogonal Matching Pursuit | 0.1680 | 0.0406 | 0.1884 | -0.3433 | 0.1282 | 0.3985 | 0.0110 |
| catboost | CatBoost Regressor | 0.1536 | 0.0406 | 0.1890 | -0.3595 | 0.1288 | 0.3969 | 0.3720 |
| en | Elastic Net | 0.1614 | 0.0394 | 0.1909 | -0.1059 | 0.1305 | 0.3998 | 0.0110 |
| llar | Lasso Least Angle Regression | 0.1614 | 0.0394 | 0.1909 | -0.1059 | 0.1305 | 0.3998 | 0.0110 |
| lasso | Lasso Regression | 0.1614 | 0.0394 | 0.1909 | -0.1059 | 0.1305 | 0.3998 | 0.0110 |
| gbr | Gradient Boosting Regressor | 0.1680 | 0.0446 | 0.1931 | -0.4270 | 0.1303 | 0.4133 | 0.0210 |
| svm | Support Vector Regression | 0.1780 | 0.0458 | 0.2057 | -0.4611 | 0.1397 | 0.4488 | 0.0150 |
| ada | AdaBoost Regressor | 0.1722 | 0.0480 | 0.2066 | -0.9043 | 0.1400 | 0.4687 | 0.0410 |
| xgboost | Extreme Gradient Boosting | 0.1824 | 0.0510 | 0.2109 | -0.7021 | 0.1431 | 0.4343 | 0.0940 |
| kr | Kernel Ridge | 0.1855 | 0.0487 | 0.2123 | -0.7150 | 0.1434 | 0.4040 | 0.0090 |
| mlp | MLP Regressor | 0.1772 | 0.0500 | 0.2152 | -0.6603 | 0.1476 | 0.4713 | 0.0190 |
| dt | Decision Tree Regressor | 0.2018 | 0.0686 | 0.2477 | -1.3639 | 0.1623 | 0.5146 | 0.0080 |
| par | Passive Aggressive Regressor | 0.2630 | 0.1055 | 0.2931 | -1.6873 | 0.1862 | 0.5928 | 0.0080 |
['TheilSen Regressor', 'Ridge Regression', 'Linear Regression', 'Least Angle Regression', 'Random Forest Regressor', 'Bayesian Ridge', 'Automatic Relevance Determination', 'Huber Regressor', 'Extra Trees Regressor', 'Random Sample Consensus', 'Light Gradient Boosting Machine', 'Orthogonal Matching Pursuit', 'CatBoost Regressor', 'Elastic Net', 'Lasso Least Angle Regression', 'Lasso Regression', 'Gradient Boosting Regressor', 'Support Vector Regression', 'AdaBoost Regressor', 'Extreme Gradient Boosting', 'Kernel Ridge', 'MLP Regressor', 'Decision Tree Regressor', 'Passive Aggressive Regressor']
rmse = pd.DataFrame()
for i in range (0,len(results)):
best_model = create_model(results.index[i])
tuned_model = tune_model(best_model)
currentmodelname = results.index[i]
tuned_model_predictions = predict_model(tuned_model,data = df_val_lønforbrug_variables)
actual = tuned_model_predictions['Lønforbrug (t)']
predictions = tuned_model_predictions['Label']
forecast_actual = list(zip(predictions,actual))
forecast_period = df_val_lønforbrug_variables.index
forecast_result = pd.DataFrame(forecast_actual, columns = ['Forecast','Faktiske'], index = forecast_period, dtype='float')
rmse.loc[currentmodelname,'RMSE'] = mean_squared_error(actual,predictions,squared=False)
| MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
|---|---|---|---|---|---|---|
| 0 | 0.1708 | 0.0374 | 0.1934 | -0.2829 | 0.1279 | 0.3904 |
| 1 | 0.2788 | 0.0941 | 0.3067 | -0.3187 | 0.1974 | 0.6588 |
| 2 | 0.1362 | 0.0218 | 0.1478 | -1.8774 | 0.0990 | 0.3058 |
| 3 | 0.1863 | 0.0509 | 0.2257 | 0.1190 | 0.1648 | 1.0033 |
| 4 | 0.2151 | 0.0792 | 0.2814 | -0.1736 | 0.2201 | 0.1889 |
| 5 | 0.1169 | 0.0187 | 0.1369 | 0.5177 | 0.0837 | 0.2066 |
| 6 | 0.1404 | 0.0249 | 0.1578 | -0.0545 | 0.1034 | 0.2908 |
| 7 | 0.1181 | 0.0158 | 0.1255 | 0.2235 | 0.0858 | 0.2691 |
| 8 | 0.1349 | 0.0220 | 0.1482 | -0.3465 | 0.0960 | 0.2263 |
| 9 | 0.1824 | 0.0354 | 0.1881 | 0.0408 | 0.1195 | 0.3720 |
| Mean | 0.1680 | 0.0400 | 0.1911 | -0.2152 | 0.1298 | 0.3912 |
| SD | 0.0480 | 0.0256 | 0.0590 | 0.6107 | 0.0457 | 0.2411 |
rmse = rmse.sort_values(by=['RMSE'])
best_model = create_model(rmse.index[0])
final_model_lønforbrug = tune_model(best_model)
save_model(final_model_lønforbrug, 'Lønforbrug')
| MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
|---|---|---|---|---|---|---|
| 0 | 0.1490 | 0.0313 | 0.1768 | -0.0718 | 0.1212 | 0.4167 |
| 1 | 0.2463 | 0.0767 | 0.2769 | -0.0744 | 0.1816 | 0.6307 |
| 2 | 0.0929 | 0.0137 | 0.1172 | -0.8086 | 0.0792 | 0.2255 |
| 3 | 0.1628 | 0.0377 | 0.1942 | 0.3477 | 0.1442 | 0.8934 |
| 4 | 0.1921 | 0.0813 | 0.2851 | -0.2047 | 0.2242 | 0.1224 |
| 5 | 0.1402 | 0.0237 | 0.1539 | 0.3904 | 0.1008 | 0.3096 |
| 6 | 0.1234 | 0.0225 | 0.1499 | 0.0483 | 0.0983 | 0.2614 |
| 7 | 0.1346 | 0.0202 | 0.1421 | 0.0045 | 0.0966 | 0.3071 |
| 8 | 0.1018 | 0.0158 | 0.1257 | 0.0307 | 0.0823 | 0.2009 |
| 9 | 0.1192 | 0.0229 | 0.1512 | 0.3802 | 0.0979 | 0.2598 |
| Mean | 0.1462 | 0.0346 | 0.1773 | 0.0042 | 0.1226 | 0.3627 |
| SD | 0.0432 | 0.0232 | 0.0560 | 0.3358 | 0.0449 | 0.2204 |
Transformation Pipeline and Model Successfully Saved
(Pipeline(memory=None,
steps=[('dtypes',
DataTypes_Auto_infer(categorical_features=[],
display_types=True, features_todrop=[],
id_columns=[], ml_usecase='regression',
numerical_features=[],
target='Lønforbrug (t)',
time_features=[])),
('imputer',
Simple_Imputer(categorical_strategy='not_available',
fill_value_categorical=None,
fill_value_numerical=None,
numeric_st...
('cluster_all', 'passthrough'),
('dummy', Dummify(target='Lønforbrug (t)')),
('fix_perfect', Remove_100(target='Lønforbrug (t)')),
('clean_names', Clean_Colum_Names()),
('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
('dfs', 'passthrough'), ('pca', 'passthrough'),
['trained_model',
<catboost.core.CatBoostRegressor object at 0x0000024D28320DC0>]],
verbose=False),
'Lønforbrug.pkl')
rmse.index[0]
'catboost'
setup_faktureringsgrad = setup(data = df_train_faktureringsgrad_variables,
train_size = 1.0,
test_data = df_val_faktureringsgrad_variables,
target = 'Faktureringsgrad (t)',
fold_strategy = 'timeseries',
categorical_features = ['Februar (t)','Marts (t)','April (t)','Maj (t)','Juni (t)','Juli (t)',
'August (t)','September (t)','Oktober (t)','November (t)','December (t)'],
normalize = False,
transform_target = False,
session_id=123)
| Description | Value | |
|---|---|---|
| 0 | session_id | 123 |
| 1 | Target | Faktureringsgrad (t) |
| 2 | Original Data | (46, 14) |
| 3 | Missing Values | False |
| 4 | Numeric Features | 2 |
| 5 | Categorical Features | 11 |
| 6 | Ordinal Features | False |
| 7 | High Cardinality Features | False |
| 8 | High Cardinality Method | None |
| 9 | Transformed Train Set | (46, 13) |
| 10 | Transformed Test Set | (8, 13) |
| 11 | Shuffle Train-Test | True |
| 12 | Stratify Train-Test | False |
| 13 | Fold Generator | TimeSeriesSplit |
| 14 | Fold Number | 10 |
| 15 | CPU Jobs | -1 |
| 16 | Use GPU | False |
| 17 | Log Experiment | False |
| 18 | Experiment Name | reg-default-name |
| 19 | USI | 9f0f |
| 20 | Imputation Type | simple |
| 21 | Iterative Imputation Iteration | None |
| 22 | Numeric Imputer | mean |
| 23 | Iterative Imputation Numeric Model | None |
| 24 | Categorical Imputer | constant |
| 25 | Iterative Imputation Categorical Model | None |
| 26 | Unknown Categoricals Handling | least_frequent |
| 27 | Normalize | False |
| 28 | Normalize Method | None |
| 29 | Transformation | False |
| 30 | Transformation Method | None |
| 31 | PCA | False |
| 32 | PCA Method | None |
| 33 | PCA Components | None |
| 34 | Ignore Low Variance | False |
| 35 | Combine Rare Levels | False |
| 36 | Rare Level Threshold | None |
| 37 | Numeric Binning | False |
| 38 | Remove Outliers | False |
| 39 | Outliers Threshold | None |
| 40 | Remove Multicollinearity | False |
| 41 | Multicollinearity Threshold | None |
| 42 | Remove Perfect Collinearity | True |
| 43 | Clustering | False |
| 44 | Clustering Iteration | None |
| 45 | Polynomial Features | False |
| 46 | Polynomial Degree | None |
| 47 | Trignometry Features | False |
| 48 | Polynomial Threshold | None |
| 49 | Group Features | False |
| 50 | Feature Selection | False |
| 51 | Feature Selection Method | classic |
| 52 | Features Selection Threshold | None |
| 53 | Feature Interaction | False |
| 54 | Feature Ratio | False |
| 55 | Interaction Threshold | None |
| 56 | Transform Target | False |
| 57 | Transform Target Method | box-cox |
models = compare_models(sort ='RMSE',exclude =['knn'],turbo = False)
results = pull()
results.Model.tolist()
| Model | MAE | MSE | RMSE | R2 | RMSLE | MAPE | TT (Sec) | |
|---|---|---|---|---|---|---|---|---|
| ransac | Random Sample Consensus | 0.1670 | 0.0504 | 0.1908 | -2.7527 | 0.1178 | 0.2515 | 0.0270 |
| kr | Kernel Ridge | 0.1654 | 0.0431 | 0.1942 | -1.5714 | 0.1230 | 0.2683 | 0.0080 |
| ridge | Ridge Regression | 0.1750 | 0.0470 | 0.2024 | -2.1946 | 0.1282 | 0.2896 | 0.0110 |
| br | Bayesian Ridge | 0.1761 | 0.0461 | 0.2031 | -2.0792 | 0.1301 | 0.2944 | 0.0060 |
| par | Passive Aggressive Regressor | 0.1769 | 0.0472 | 0.2054 | -1.7894 | 0.1308 | 0.3014 | 0.0120 |
| ada | AdaBoost Regressor | 0.1816 | 0.0472 | 0.2072 | -2.1887 | 0.1295 | 0.2919 | 0.0370 |
| svm | Support Vector Regression | 0.1790 | 0.0496 | 0.2079 | -2.3125 | 0.1333 | 0.2975 | 0.0090 |
| tr | TheilSen Regressor | 0.1769 | 0.0498 | 0.2096 | -2.3480 | 0.1337 | 0.2955 | 0.3390 |
| ard | Automatic Relevance Determination | 0.1828 | 0.0492 | 0.2108 | -2.4848 | 0.1363 | 0.3138 | 0.0080 |
| catboost | CatBoost Regressor | 0.1844 | 0.0517 | 0.2116 | -2.9139 | 0.1329 | 0.3083 | 0.3080 |
| mlp | MLP Regressor | 0.1802 | 0.0527 | 0.2117 | -1.9622 | 0.1326 | 0.3257 | 0.0180 |
| rf | Random Forest Regressor | 0.1884 | 0.0519 | 0.2165 | -2.6067 | 0.1359 | 0.3152 | 0.1130 |
| lasso | Lasso Regression | 0.1909 | 0.0563 | 0.2209 | -3.0924 | 0.1381 | 0.3121 | 0.0140 |
| en | Elastic Net | 0.1909 | 0.0563 | 0.2209 | -3.0924 | 0.1381 | 0.3121 | 0.0080 |
| llar | Lasso Least Angle Regression | 0.1909 | 0.0563 | 0.2209 | -3.0924 | 0.1381 | 0.3121 | 0.0120 |
| huber | Huber Regressor | 0.1943 | 0.0557 | 0.2255 | -2.3727 | 0.1474 | 0.3312 | 0.0160 |
| lightgbm | Light Gradient Boosting Machine | 0.1945 | 0.0578 | 0.2258 | -3.1851 | 0.1411 | 0.3172 | 0.0150 |
| et | Extra Trees Regressor | 0.2056 | 0.0601 | 0.2349 | -3.0467 | 0.1520 | 0.3422 | 0.1010 |
| xgboost | Extreme Gradient Boosting | 0.2081 | 0.0664 | 0.2383 | -4.1590 | 0.1515 | 0.3303 | 0.0760 |
| lr | Linear Regression | 0.2091 | 0.0662 | 0.2407 | -3.0597 | 0.1563 | 0.3515 | 0.0100 |
| gbr | Gradient Boosting Regressor | 0.2121 | 0.0650 | 0.2410 | -4.0341 | 0.1518 | 0.3528 | 0.0210 |
| omp | Orthogonal Matching Pursuit | 0.2128 | 0.0673 | 0.2422 | -3.4288 | 0.1557 | 0.3591 | 0.0080 |
| dt | Decision Tree Regressor | 0.2284 | 0.0788 | 0.2593 | -5.3512 | 0.1635 | 0.3669 | 0.0080 |
| lar | Least Angle Regression | 0.6394 | 1.1481 | 0.7346 | -96.3225 | 0.2841 | 0.9973 | 0.0100 |
['Random Sample Consensus', 'Kernel Ridge', 'Ridge Regression', 'Bayesian Ridge', 'Passive Aggressive Regressor', 'AdaBoost Regressor', 'Support Vector Regression', 'TheilSen Regressor', 'Automatic Relevance Determination', 'CatBoost Regressor', 'MLP Regressor', 'Random Forest Regressor', 'Lasso Regression', 'Elastic Net', 'Lasso Least Angle Regression', 'Huber Regressor', 'Light Gradient Boosting Machine', 'Extra Trees Regressor', 'Extreme Gradient Boosting', 'Linear Regression', 'Gradient Boosting Regressor', 'Orthogonal Matching Pursuit', 'Decision Tree Regressor', 'Least Angle Regression']
rmse = pd.DataFrame()
for i in range (0,len(results)):
best_model = create_model(results.index[i])
tuned_model = tune_model(best_model)
currentmodelname = results.index[i]
tuned_model_predictions = predict_model(tuned_model,data = df_val_faktureringsgrad_variables)
actual = tuned_model_predictions['Faktureringsgrad (t)']
predictions = tuned_model_predictions['Label']
forecast_actual = list(zip(predictions,actual))
forecast_period = df_val_faktureringsgrad_variables.index
forecast_result = pd.DataFrame(forecast_actual, columns = ['Forecast','Faktiske'], index = forecast_period, dtype='float')
rmse.loc[currentmodelname,'RMSE'] = mean_squared_error(actual,predictions,squared=False)
| MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
|---|---|---|---|---|---|---|
| 0 | 0.3712 | 0.1480 | 0.3847 | -14.2704 | 0.2660 | 1.1661 |
| 1 | 0.3175 | 0.1590 | 0.3987 | -0.4274 | 0.2912 | 0.3704 |
| 2 | 0.1939 | 0.0435 | 0.2086 | -4.2951 | 0.1184 | 0.2207 |
| 3 | 0.3057 | 0.1329 | 0.3646 | -7.1399 | 0.2715 | 0.5266 |
| 4 | 0.2665 | 0.1569 | 0.3962 | -6.5121 | 0.1938 | 0.3585 |
| 5 | 0.2096 | 0.0463 | 0.2151 | -2.1093 | 0.1249 | 0.3037 |
| 6 | 0.4370 | 0.2064 | 0.4543 | -5.4568 | 0.2762 | 0.7936 |
| 7 | 0.1113 | 0.0139 | 0.1178 | 0.1344 | 0.0756 | 0.1943 |
| 8 | 0.1872 | 0.0539 | 0.2321 | -5.2235 | 0.1289 | 0.2167 |
| 9 | 0.4440 | 0.3203 | 0.5659 | -19.0275 | 0.2896 | 0.6928 |
| Mean | 0.2844 | 0.1281 | 0.3338 | -6.4328 | 0.2036 | 0.4843 |
| SD | 0.1055 | 0.0881 | 0.1291 | 5.7047 | 0.0802 | 0.2987 |
rmse = rmse.sort_values(by=['RMSE'])
best_model = create_model(rmse.index[0])
final_model_faktureringsgrad = tune_model(best_model)
save_model(final_model_faktureringsgrad, 'Faktureringsgrad')
| MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
|---|---|---|---|---|---|---|
| 0 | 0.3072 | 0.1041 | 0.3226 | -9.7355 | 0.2163 | 0.9790 |
| 1 | 0.2674 | 0.1114 | 0.3338 | -0.0005 | 0.2380 | 0.2668 |
| 2 | 0.1274 | 0.0245 | 0.1564 | -1.9747 | 0.0839 | 0.1338 |
| 3 | 0.2578 | 0.0810 | 0.2846 | -3.9599 | 0.1823 | 0.6159 |
| 4 | 0.1268 | 0.0217 | 0.1473 | -0.0391 | 0.0813 | 0.1591 |
| 5 | 0.1158 | 0.0163 | 0.1278 | -0.0987 | 0.0737 | 0.1657 |
| 6 | 0.1815 | 0.0373 | 0.1931 | -0.1663 | 0.1179 | 0.3192 |
| 7 | 0.1879 | 0.0483 | 0.2198 | -2.0126 | 0.1368 | 0.3214 |
| 8 | 0.0925 | 0.0164 | 0.1280 | -0.8931 | 0.0706 | 0.1041 |
| 9 | 0.1470 | 0.0223 | 0.1494 | -0.3956 | 0.0896 | 0.2402 |
| Mean | 0.1811 | 0.0483 | 0.2063 | -1.9276 | 0.1290 | 0.3305 |
| SD | 0.0696 | 0.0350 | 0.0760 | 2.8679 | 0.0590 | 0.2571 |
Transformation Pipeline and Model Successfully Saved
(Pipeline(memory=None,
steps=[('dtypes',
DataTypes_Auto_infer(categorical_features=['Februar (t)',
'Marts (t)',
'April (t)',
'Maj (t)',
'Juni (t)',
'Juli (t)',
'August (t)',
'September (t)',
'Oktober (t)',
'November (t)',
'December (t)'],
display_types=True, features_todrop=[],
id_columns=[], ml_usecase='regression',
numerical_features=[],
target='Faktureringsgrad (t)',
time_feat...
('dfs', 'passthrough'), ('pca', 'passthrough'),
['trained_model',
DecisionTreeRegressor(ccp_alpha=0.0, criterion='mae',
max_depth=4, max_features='log2',
max_leaf_nodes=None,
min_impurity_decrease=0.0002,
min_impurity_split=None,
min_samples_leaf=5, min_samples_split=5,
min_weight_fraction_leaf=0.0,
presort='deprecated', random_state=123,
splitter='best')]],
verbose=False),
'Faktureringsgrad.pkl')
rmse.index[0]
'dt'
setup_fakturerbartid = setup(data = df_train_fakturerbartid_variables,
train_size = 1.0,
test_data = df_val_fakturerbartid_variables,
target = 'Fakturerbar tid (t)',
fold_strategy = 'timeseries',
normalize = False,
transform_target = False,
session_id=123)
| Description | Value | |
|---|---|---|
| 0 | session_id | 123 |
| 1 | Target | Fakturerbar tid (t) |
| 2 | Original Data | (46, 4) |
| 3 | Missing Values | False |
| 4 | Numeric Features | 3 |
| 5 | Categorical Features | 0 |
| 6 | Ordinal Features | False |
| 7 | High Cardinality Features | False |
| 8 | High Cardinality Method | None |
| 9 | Transformed Train Set | (46, 3) |
| 10 | Transformed Test Set | (8, 3) |
| 11 | Shuffle Train-Test | True |
| 12 | Stratify Train-Test | False |
| 13 | Fold Generator | TimeSeriesSplit |
| 14 | Fold Number | 10 |
| 15 | CPU Jobs | -1 |
| 16 | Use GPU | False |
| 17 | Log Experiment | False |
| 18 | Experiment Name | reg-default-name |
| 19 | USI | e989 |
| 20 | Imputation Type | simple |
| 21 | Iterative Imputation Iteration | None |
| 22 | Numeric Imputer | mean |
| 23 | Iterative Imputation Numeric Model | None |
| 24 | Categorical Imputer | constant |
| 25 | Iterative Imputation Categorical Model | None |
| 26 | Unknown Categoricals Handling | least_frequent |
| 27 | Normalize | False |
| 28 | Normalize Method | None |
| 29 | Transformation | False |
| 30 | Transformation Method | None |
| 31 | PCA | False |
| 32 | PCA Method | None |
| 33 | PCA Components | None |
| 34 | Ignore Low Variance | False |
| 35 | Combine Rare Levels | False |
| 36 | Rare Level Threshold | None |
| 37 | Numeric Binning | False |
| 38 | Remove Outliers | False |
| 39 | Outliers Threshold | None |
| 40 | Remove Multicollinearity | False |
| 41 | Multicollinearity Threshold | None |
| 42 | Remove Perfect Collinearity | True |
| 43 | Clustering | False |
| 44 | Clustering Iteration | None |
| 45 | Polynomial Features | False |
| 46 | Polynomial Degree | None |
| 47 | Trignometry Features | False |
| 48 | Polynomial Threshold | None |
| 49 | Group Features | False |
| 50 | Feature Selection | False |
| 51 | Feature Selection Method | classic |
| 52 | Features Selection Threshold | None |
| 53 | Feature Interaction | False |
| 54 | Feature Ratio | False |
| 55 | Interaction Threshold | None |
| 56 | Transform Target | False |
| 57 | Transform Target Method | box-cox |
models = compare_models(sort ='RMSE',exclude =['knn'],turbo = False)
results = pull()
results.Model.tolist()
| Model | MAE | MSE | RMSE | R2 | RMSLE | MAPE | TT (Sec) | |
|---|---|---|---|---|---|---|---|---|
| ransac | Random Sample Consensus | 0.1383 | 0.0323 | 0.1632 | -0.4045 | 0.1090 | 0.3887 | 0.0190 |
| huber | Huber Regressor | 0.1514 | 0.0385 | 0.1834 | -0.4718 | 0.1223 | 0.4410 | 0.0130 |
| tr | TheilSen Regressor | 0.1496 | 0.0403 | 0.1851 | -0.3832 | 0.1234 | 0.4410 | 0.2200 |
| kr | Kernel Ridge | 0.1565 | 0.0431 | 0.1851 | -0.2930 | 0.1250 | 0.4326 | 0.0100 |
| ridge | Ridge Regression | 0.1475 | 0.0382 | 0.1864 | -0.5492 | 0.1248 | 0.4632 | 0.0110 |
| catboost | CatBoost Regressor | 0.1567 | 0.0388 | 0.1877 | -0.6058 | 0.1270 | 0.4881 | 0.2470 |
| en | Elastic Net | 0.1461 | 0.0393 | 0.1900 | -0.5564 | 0.1280 | 0.4534 | 0.0110 |
| llar | Lasso Least Angle Regression | 0.1461 | 0.0393 | 0.1900 | -0.5564 | 0.1280 | 0.4534 | 0.0100 |
| lasso | Lasso Regression | 0.1461 | 0.0393 | 0.1900 | -0.5564 | 0.1280 | 0.4534 | 0.0120 |
| lightgbm | Light Gradient Boosting Machine | 0.1474 | 0.0398 | 0.1908 | -0.5635 | 0.1288 | 0.4615 | 0.0160 |
| br | Bayesian Ridge | 0.1542 | 0.0408 | 0.1914 | -0.6265 | 0.1277 | 0.4846 | 0.0110 |
| ada | AdaBoost Regressor | 0.1639 | 0.0431 | 0.1924 | -0.5704 | 0.1290 | 0.5081 | 0.0420 |
| rf | Random Forest Regressor | 0.1599 | 0.0407 | 0.1930 | -0.6407 | 0.1299 | 0.4900 | 0.1110 |
| mlp | MLP Regressor | 0.1663 | 0.0440 | 0.1937 | -0.5806 | 0.1307 | 0.5277 | 0.0260 |
| ard | Automatic Relevance Determination | 0.1623 | 0.0442 | 0.1978 | -0.6874 | 0.1318 | 0.5055 | 0.0110 |
| gbr | Gradient Boosting Regressor | 0.1741 | 0.0458 | 0.1987 | -0.6692 | 0.1351 | 0.5326 | 0.0300 |
| lr | Linear Regression | 0.1637 | 0.0448 | 0.1988 | -0.8949 | 0.1320 | 0.5011 | 0.0100 |
| lar | Least Angle Regression | 0.1637 | 0.0448 | 0.1988 | -0.8949 | 0.1320 | 0.5011 | 0.0120 |
| et | Extra Trees Regressor | 0.1740 | 0.0436 | 0.2001 | -0.8346 | 0.1348 | 0.5135 | 0.1110 |
| svm | Support Vector Regression | 0.1710 | 0.0479 | 0.2044 | -0.7630 | 0.1386 | 0.5607 | 0.0110 |
| omp | Orthogonal Matching Pursuit | 0.1721 | 0.0485 | 0.2080 | -0.8572 | 0.1387 | 0.5301 | 0.0100 |
| xgboost | Extreme Gradient Boosting | 0.1836 | 0.0507 | 0.2158 | -1.1845 | 0.1460 | 0.5611 | 0.0740 |
| dt | Decision Tree Regressor | 0.1917 | 0.0588 | 0.2329 | -1.8433 | 0.1589 | 0.6147 | 0.0120 |
| par | Passive Aggressive Regressor | 0.4215 | 0.2454 | 0.4659 | -15.1002 | 0.2395 | 1.1670 | 0.0120 |
['Random Sample Consensus', 'Huber Regressor', 'TheilSen Regressor', 'Kernel Ridge', 'Ridge Regression', 'CatBoost Regressor', 'Elastic Net', 'Lasso Least Angle Regression', 'Lasso Regression', 'Light Gradient Boosting Machine', 'Bayesian Ridge', 'AdaBoost Regressor', 'Random Forest Regressor', 'MLP Regressor', 'Automatic Relevance Determination', 'Gradient Boosting Regressor', 'Linear Regression', 'Least Angle Regression', 'Extra Trees Regressor', 'Support Vector Regression', 'Orthogonal Matching Pursuit', 'Extreme Gradient Boosting', 'Decision Tree Regressor', 'Passive Aggressive Regressor']
rmse = pd.DataFrame()
for i in range (0,len(results)):
best_model = create_model(results.index[i])
tuned_model = tune_model(best_model)
currentmodelname = results.index[i]
tuned_model_predictions = predict_model(tuned_model,data = df_val_fakturerbartid_variables)
actual = tuned_model_predictions['Fakturerbar tid (t)']
predictions = tuned_model_predictions['Label']
forecast_actual = list(zip(predictions,actual))
forecast_period = df_val_fakturerbartid_variables.index
forecast_result = pd.DataFrame(forecast_actual, columns = ['Forecast','Faktiske'], index = forecast_period, dtype='float')
rmse.loc[currentmodelname,'RMSE'] = mean_squared_error(actual,predictions,squared=False)
| MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
|---|---|---|---|---|---|---|
| 0 | 0.1548 | 0.0567 | 0.2382 | -0.4090 | 0.1830 | 1.5751 |
| 1 | 0.1844 | 0.0698 | 0.2641 | -0.4062 | 0.1686 | 0.3471 |
| 2 | 0.0894 | 0.0108 | 0.1041 | -1.1870 | 0.0729 | 0.2474 |
| 3 | 0.1457 | 0.0300 | 0.1733 | -2.3084 | 0.1251 | 0.4846 |
| 4 | 0.0993 | 0.0134 | 0.1156 | -0.3933 | 0.0837 | 0.3428 |
| 5 | 0.2389 | 0.0891 | 0.2985 | -1.8054 | 0.2017 | 0.4519 |
| 6 | 0.1253 | 0.0305 | 0.1747 | -0.1381 | 0.1053 | 0.1841 |
| 7 | 0.1028 | 0.0200 | 0.1415 | -0.2131 | 0.0896 | 0.1675 |
| 8 | 0.2161 | 0.0533 | 0.2309 | -0.3301 | 0.1616 | 0.6518 |
| 9 | 0.2583 | 0.0862 | 0.2936 | -0.1717 | 0.1790 | 0.4416 |
| Mean | 0.1615 | 0.0460 | 0.2034 | -0.7362 | 0.1370 | 0.4894 |
| SD | 0.0574 | 0.0277 | 0.0678 | 0.7257 | 0.0447 | 0.3881 |
rmse = rmse.sort_values(by=['RMSE'])
best_model = create_model(rmse.index[0])
final_model_fakturerbartid = tune_model(best_model)
save_model(final_model_fakturerbartid, 'Fakturerbartid')
| MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
|---|---|---|---|---|---|---|
| 0 | 0.2301 | 0.0621 | 0.2492 | -0.5414 | 0.1847 | 1.5038 |
| 1 | 0.2128 | 0.0661 | 0.2571 | -0.3328 | 0.1646 | 0.4478 |
| 2 | 0.0704 | 0.0073 | 0.0853 | -0.4680 | 0.0598 | 0.1929 |
| 3 | 0.1337 | 0.0257 | 0.1604 | -1.8355 | 0.1109 | 0.4102 |
| 4 | 0.0883 | 0.0113 | 0.1062 | -0.1763 | 0.0810 | 0.3457 |
| 5 | 0.1501 | 0.0427 | 0.2066 | -0.3447 | 0.1343 | 0.2626 |
| 6 | 0.1678 | 0.0389 | 0.1973 | -0.4523 | 0.1231 | 0.2692 |
| 7 | 0.0967 | 0.0119 | 0.1092 | 0.2767 | 0.0687 | 0.1747 |
| 8 | 0.1207 | 0.0192 | 0.1384 | 0.5218 | 0.0906 | 0.2476 |
| 9 | 0.2526 | 0.0838 | 0.2896 | -0.1397 | 0.1760 | 0.4249 |
| Mean | 0.1523 | 0.0369 | 0.1799 | -0.3492 | 0.1194 | 0.4279 |
| SD | 0.0594 | 0.0252 | 0.0673 | 0.5912 | 0.0427 | 0.3701 |
Transformation Pipeline and Model Successfully Saved
(Pipeline(memory=None,
steps=[('dtypes',
DataTypes_Auto_infer(categorical_features=[],
display_types=True, features_todrop=[],
id_columns=[], ml_usecase='regression',
numerical_features=[],
target='Fakturerbar tid (t)',
time_features=[])),
('imputer',
Simple_Imputer(categorical_strategy='not_available',
fill_value_categorical=None,
fill_value_numerical=None,
numeri...
('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
('dfs', 'passthrough'), ('pca', 'passthrough'),
['trained_model',
RANSACRegressor(base_estimator=None, is_data_valid=None,
is_model_valid=None, loss='squared_loss',
max_skips=17, max_trials=7, min_samples=0.5,
random_state=123, residual_threshold=None,
stop_n_inliers=10, stop_probability=0.57,
stop_score=inf)]],
verbose=False),
'Fakturerbartid.pkl')
rmse.index[0]
'ransac'
setup_antalp = setup(data = df_train_antalp_variables,
train_size = 1.0,
test_data = df_val_antalp_variables,
target = 'Antal produktionsmedarbejdere (t)',
fold_strategy = 'timeseries',
categorical_features = ['Februar (t)','Marts (t)','April (t)','Maj (t)','Juni (t)','Juli (t)',
'August (t)','September (t)','Oktober (t)','November (t)','December (t)'],
normalize = False,
transform_target = False,
session_id=123)
| Description | Value | |
|---|---|---|
| 0 | session_id | 123 |
| 1 | Target | Antal produktionsmedarbejdere (t) |
| 2 | Original Data | (46, 13) |
| 3 | Missing Values | False |
| 4 | Numeric Features | 1 |
| 5 | Categorical Features | 11 |
| 6 | Ordinal Features | False |
| 7 | High Cardinality Features | False |
| 8 | High Cardinality Method | None |
| 9 | Transformed Train Set | (46, 12) |
| 10 | Transformed Test Set | (8, 12) |
| 11 | Shuffle Train-Test | True |
| 12 | Stratify Train-Test | False |
| 13 | Fold Generator | TimeSeriesSplit |
| 14 | Fold Number | 10 |
| 15 | CPU Jobs | -1 |
| 16 | Use GPU | False |
| 17 | Log Experiment | False |
| 18 | Experiment Name | reg-default-name |
| 19 | USI | 8fdd |
| 20 | Imputation Type | simple |
| 21 | Iterative Imputation Iteration | None |
| 22 | Numeric Imputer | mean |
| 23 | Iterative Imputation Numeric Model | None |
| 24 | Categorical Imputer | constant |
| 25 | Iterative Imputation Categorical Model | None |
| 26 | Unknown Categoricals Handling | least_frequent |
| 27 | Normalize | False |
| 28 | Normalize Method | None |
| 29 | Transformation | False |
| 30 | Transformation Method | None |
| 31 | PCA | False |
| 32 | PCA Method | None |
| 33 | PCA Components | None |
| 34 | Ignore Low Variance | False |
| 35 | Combine Rare Levels | False |
| 36 | Rare Level Threshold | None |
| 37 | Numeric Binning | False |
| 38 | Remove Outliers | False |
| 39 | Outliers Threshold | None |
| 40 | Remove Multicollinearity | False |
| 41 | Multicollinearity Threshold | None |
| 42 | Remove Perfect Collinearity | True |
| 43 | Clustering | False |
| 44 | Clustering Iteration | None |
| 45 | Polynomial Features | False |
| 46 | Polynomial Degree | None |
| 47 | Trignometry Features | False |
| 48 | Polynomial Threshold | None |
| 49 | Group Features | False |
| 50 | Feature Selection | False |
| 51 | Feature Selection Method | classic |
| 52 | Features Selection Threshold | None |
| 53 | Feature Interaction | False |
| 54 | Feature Ratio | False |
| 55 | Interaction Threshold | None |
| 56 | Transform Target | False |
| 57 | Transform Target Method | box-cox |
models = compare_models(sort ='RMSE',exclude =['knn'],turbo = False)
results = pull()
results.Model.tolist()
| Model | MAE | MSE | RMSE | R2 | RMSLE | MAPE | TT (Sec) | |
|---|---|---|---|---|---|---|---|---|
| lightgbm | Light Gradient Boosting Machine | 0.2217 | 0.0708 | 0.2507 | -4.5556 | 0.1733 | 1.4629 | 0.0170 |
| ard | Automatic Relevance Determination | 0.2163 | 0.0732 | 0.2510 | -3.1399 | 0.1737 | 1.8074 | 0.0120 |
| en | Elastic Net | 0.2219 | 0.0711 | 0.2519 | -4.5722 | 0.1741 | 1.4665 | 0.0120 |
| llar | Lasso Least Angle Regression | 0.2219 | 0.0711 | 0.2519 | -4.5722 | 0.1741 | 1.4665 | 0.0110 |
| lasso | Lasso Regression | 0.2219 | 0.0711 | 0.2519 | -4.5722 | 0.1741 | 1.4665 | 0.0140 |
| ada | AdaBoost Regressor | 0.2248 | 0.0732 | 0.2542 | -2.8617 | 0.1785 | 1.6564 | 0.0460 |
| rf | Random Forest Regressor | 0.2204 | 0.0747 | 0.2547 | -3.3659 | 0.1771 | 1.5858 | 0.1210 |
| kr | Kernel Ridge | 0.2168 | 0.0728 | 0.2583 | -3.9231 | 0.1791 | 1.4592 | 0.0140 |
| br | Bayesian Ridge | 0.2264 | 0.0772 | 0.2583 | -4.6648 | 0.1778 | 1.7561 | 0.0110 |
| ridge | Ridge Regression | 0.2262 | 0.0744 | 0.2587 | -3.9795 | 0.1797 | 1.6614 | 0.0140 |
| svm | Support Vector Regression | 0.2212 | 0.0743 | 0.2589 | -4.1594 | 0.1804 | 1.6507 | 0.0100 |
| omp | Orthogonal Matching Pursuit | 0.2276 | 0.0810 | 0.2657 | -3.4147 | 0.1812 | 1.5940 | 0.0110 |
| huber | Huber Regressor | 0.2204 | 0.0794 | 0.2680 | -3.2422 | 0.1864 | 1.7112 | 0.0220 |
| xgboost | Extreme Gradient Boosting | 0.2375 | 0.0862 | 0.2720 | -3.7561 | 0.1903 | 1.5490 | 0.1010 |
| tr | TheilSen Regressor | 0.2314 | 0.0857 | 0.2779 | -4.1533 | 0.1910 | 1.7441 | 0.3190 |
| lr | Linear Regression | 0.2362 | 0.0872 | 0.2802 | -4.1446 | 0.1922 | 1.7416 | 0.0090 |
| mlp | MLP Regressor | 0.2353 | 0.0896 | 0.2815 | -4.8645 | 0.1943 | 1.8385 | 0.0270 |
| catboost | CatBoost Regressor | 0.2432 | 0.0900 | 0.2853 | -4.8871 | 0.1994 | 1.6889 | 0.3640 |
| gbr | Gradient Boosting Regressor | 0.2484 | 0.0941 | 0.2879 | -5.1773 | 0.2014 | 1.7542 | 0.0260 |
| par | Passive Aggressive Regressor | 0.2449 | 0.0924 | 0.2889 | -3.3545 | 0.2002 | 1.6755 | 0.0120 |
| ransac | Random Sample Consensus | 0.2457 | 0.1583 | 0.2939 | -9.8119 | 0.1802 | 0.8704 | 0.0370 |
| dt | Decision Tree Regressor | 0.2745 | 0.1051 | 0.3051 | -5.2172 | 0.2116 | 1.8125 | 0.0110 |
| et | Extra Trees Regressor | 0.2678 | 0.1063 | 0.3104 | -5.8343 | 0.2149 | 1.8791 | 0.1110 |
| lar | Least Angle Regression | 1.3187 | 16.4087 | 1.5357 | -1262.3369 | 0.3918 | 5.7304 | 0.0110 |
['Light Gradient Boosting Machine', 'Automatic Relevance Determination', 'Elastic Net', 'Lasso Least Angle Regression', 'Lasso Regression', 'AdaBoost Regressor', 'Random Forest Regressor', 'Kernel Ridge', 'Bayesian Ridge', 'Ridge Regression', 'Support Vector Regression', 'Orthogonal Matching Pursuit', 'Huber Regressor', 'Extreme Gradient Boosting', 'TheilSen Regressor', 'Linear Regression', 'MLP Regressor', 'CatBoost Regressor', 'Gradient Boosting Regressor', 'Passive Aggressive Regressor', 'Random Sample Consensus', 'Decision Tree Regressor', 'Extra Trees Regressor', 'Least Angle Regression']
rmse = pd.DataFrame()
for i in range (0,len(results)):
best_model = create_model(results.index[i])
tuned_model = tune_model(best_model)
currentmodelname = results.index[i]
tuned_model_predictions = predict_model(tuned_model,data = df_val_antalp_variables)
actual = tuned_model_predictions['Antal produktionsmedarbejdere (t)']
predictions = tuned_model_predictions['Label']
forecast_actual = list(zip(predictions,actual))
forecast_period = df_val_antalp_variables.index
forecast_result = pd.DataFrame(forecast_actual, columns = ['Forecast','Faktiske'], index = forecast_period, dtype='float')
rmse.loc[currentmodelname,'RMSE'] = mean_squared_error(actual,predictions,squared=False)
| MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
|---|---|---|---|---|---|---|
| 0 | 0.2291 | 0.0579 | 0.2406 | -0.9238 | 0.1552 | 0.5250 |
| 1 | 0.4329 | 0.2407 | 0.4906 | -2.8956 | 0.3680 | 11.0646 |
| 2 | 0.3133 | 0.1418 | 0.3766 | -9.9364 | 0.2431 | 1.1274 |
| 3 | 0.1961 | 0.0668 | 0.2585 | -0.6351 | 0.1947 | 1.3882 |
| 4 | 0.1726 | 0.0521 | 0.2284 | -0.7451 | 0.1680 | 0.7529 |
| 5 | 0.2239 | 0.0805 | 0.2837 | -1.0908 | 0.1931 | 0.7194 |
| 6 | 1.0170 | 1.4476 | 1.2032 | -112.2099 | 0.5087 | 1.9271 |
| 7 | 0.2520 | 0.0758 | 0.2753 | -18.3041 | 0.1633 | 0.2959 |
| 8 | 0.2751 | 0.0987 | 0.3141 | -0.0568 | 0.2184 | 1.1260 |
| 9 | 0.1015 | 0.0137 | 0.1171 | 0.3107 | 0.0775 | 0.1761 |
| Mean | 0.3213 | 0.2276 | 0.3788 | -14.6487 | 0.2290 | 1.9103 |
| SD | 0.2465 | 0.4109 | 0.2900 | 33.0007 | 0.1169 | 3.0918 |
rmse = rmse.sort_values(by=['RMSE'])
best_model = create_model(rmse.index[0])
final_model_antalp = tune_model(best_model)
save_model(final_model_antalp, 'Antalp')
| MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
|---|---|---|---|---|---|---|
| 0 | 0.1961 | 0.0400 | 0.1999 | -0.3283 | 0.1296 | 0.4293 |
| 1 | 0.3165 | 0.1589 | 0.3986 | -1.5723 | 0.3130 | 8.7024 |
| 2 | 0.1697 | 0.0348 | 0.1867 | -1.6868 | 0.1372 | 0.6498 |
| 3 | 0.1958 | 0.0513 | 0.2264 | -0.2546 | 0.1759 | 1.4201 |
| 4 | 0.1735 | 0.0334 | 0.1827 | -0.1173 | 0.1379 | 0.7472 |
| 5 | 0.2213 | 0.0636 | 0.2523 | -0.6528 | 0.1694 | 0.5559 |
| 6 | 0.1761 | 0.0453 | 0.2128 | -2.5426 | 0.1369 | 0.2730 |
| 7 | 0.3403 | 0.1212 | 0.3482 | -29.8895 | 0.2082 | 0.4010 |
| 8 | 0.2639 | 0.0996 | 0.3155 | -0.0665 | 0.2093 | 0.9780 |
| 9 | 0.0903 | 0.0083 | 0.0911 | 0.5825 | 0.0616 | 0.1953 |
| Mean | 0.2143 | 0.0656 | 0.2414 | -3.6528 | 0.1679 | 1.4352 |
| SD | 0.0707 | 0.0442 | 0.0857 | 8.7903 | 0.0633 | 2.4465 |
Transformation Pipeline and Model Successfully Saved
(Pipeline(memory=None,
steps=[('dtypes',
DataTypes_Auto_infer(categorical_features=['Februar (t)',
'Marts (t)',
'April (t)',
'Maj (t)',
'Juni (t)',
'Juli (t)',
'August (t)',
'September (t)',
'Oktober (t)',
'November (t)',
'December (t)'],
display_types=True, features_todrop=[],
id_columns=[], ml_usecase='regression',
numerical_features=[],
target='Antal produktionsmedarbejdere '
'...
RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
criterion='mse', max_depth=6,
max_features='log2', max_leaf_nodes=None,
max_samples=None,
min_impurity_decrease=0.0002,
min_impurity_split=None,
min_samples_leaf=2, min_samples_split=2,
min_weight_fraction_leaf=0.0,
n_estimators=290, n_jobs=-1,
oob_score=False, random_state=123,
verbose=0, warm_start=False)]],
verbose=False),
'Antalp.pkl')
rmse.index[0]
'rf'
df_ferie1 = pd.read_excel (r'C:\Users\jupe\Desktop\Praktik\210\210 - Forecast input.xlsx',sheet_name='Ferie andel af året',
skiprows = 2)
df_ferie1 = df_ferie1.drop(['Måned'], axis = 1)
df_ferie2 = pd.read_excel (r'C:\Users\jupe\Desktop\Praktik\210\210 - Forecast input.xlsx',sheet_name='Ferie andel af året',
skiprows = 2)
df_ferie2 = df_ferie2.drop(['Måned'], axis = 1)
df_ferie = df_ferie1.append(df_ferie2)
df_ferie['Periode'] = ['2021 - 01','2021 - 02','2021 - 03','2021 - 04','2021 - 05','2021 - 06','2021 - 07','2021 - 08',
'2021 - 09','2021 - 10','2021 - 11','2021 - 12','2022 - 01','2022 - 02','2022 - 03','2022 - 04',
'2022 - 05','2022 - 06','2022 - 07','2022 - 08','2022 - 09','2022 - 10','2022 - 11','2022 - 12']
df_ferie.Periode = pd.to_datetime(df_ferie.Periode)
df_ferie = df_ferie.set_index('Periode')
arbejdsdage = {'Arbejdsdage':[21,20,23,22,21,22,22,22,22,21,22,23,21,20,23,21,22,22,21,23,22,21,22,22],
'Periode':['2021 - 01','2021 - 02','2021 - 03','2021 - 04','2021 - 05','2021 - 06','2021 - 07','2021 - 08',
'2021 - 09','2021 - 10','2021 - 11','2021 - 12','2022 - 01','2022 - 02','2022 - 03','2022 - 04',
'2022 - 05','2022 - 06','2022 - 07','2022 - 08','2022 - 09','2022 - 10','2022 - 11','2022 - 12']}
df_arbejdsdage = pd.DataFrame(arbejdsdage)
df_arbejdsdage.Periode = pd.to_datetime(df_arbejdsdage.Periode)
df_arbejdsdage = df_arbejdsdage.set_index('Periode')
df_arbejdsdage_ferie_forecast = df_arbejdsdage.merge(df_ferie,left_index = True, right_index = True)
df_arbejdsdage_ferie_forecast['Arbejdsdage - ferie (t)']= df_arbejdsdage_ferie_forecast['Arbejdsdage']-(df_arbejdsdage_ferie_forecast['Gennemsnit af Ferie andel af året']*30)
df_arbejdsdage_ferie_forecast = df_arbejdsdage_ferie_forecast.drop(['Gennemsnit af Ferie andel af året','Arbejdsdage'], axis = 1)
df_arbejdsdage_ferie_forecast = (df_arbejdsdage_ferie_forecast-arbejdsdage_train_min)/(arbejdsdage_train_max-arbejdsdage_train_min)
df_forecast_periode = pd.DataFrame()
df_forecast_periode['Periode'] = ['2021 - 01','2021 - 02','2021 - 03','2021 - 04','2021 - 05','2021 - 06','2021 - 07',
'2021 - 08','2021 - 09','2021 - 10','2021 - 11','2021 - 12','2022 - 01','2022 - 02',
'2022 - 03','2022 - 04','2022 - 05','2022 - 06','2022 - 07','2022 - 08','2022 - 09',
'2022 - 10','2022 - 11','2022 - 12']
df_forecast_periode.Periode = pd.to_datetime(df_forecast_periode.Periode)
omsætning_t_1 = pd.DataFrame(df_val_omsætning_variables[['Omsætning (t-1)']])
omsætning_t_1 = pd.DataFrame(omsætning_t_1.iloc[0]).transpose()
omsætning_t_1_forecast = df_forecast_periode
omsætning_t_1_forecast = omsætning_t_1_forecast.set_index('Periode')
omsætning_t_1_forecast = omsætning_t_1_forecast.merge(omsætning_t_1,how = 'left',left_index = True,right_index = True)
vareforbrug_t_1 = pd.DataFrame(df_val_omsætning_variables[['Vareforbrug (t-1)']])
vareforbrug_t_1 = pd.DataFrame(vareforbrug_t_1.iloc[0]).transpose()
vareforbrug_t_1_forecast = df_forecast_periode
vareforbrug_t_1_forecast = vareforbrug_t_1_forecast.set_index('Periode')
vareforbrug_t_1_forecast = vareforbrug_t_1_forecast.merge(vareforbrug_t_1,how = 'left',left_index = True,right_index = True)
lønforbrug_t_1 = pd.DataFrame(df_val_lønforbrug_variables[['Lønforbrug (t-1)']])
lønforbrug_t_1 = pd.DataFrame(lønforbrug_t_1.iloc[0]).transpose()
lønforbrug_t_1_forecast = df_forecast_periode
lønforbrug_t_1_forecast = lønforbrug_t_1_forecast.set_index('Periode')
lønforbrug_t_1_forecast = lønforbrug_t_1_forecast.merge(lønforbrug_t_1,how = 'left',left_index = True,right_index = True)
faktureringsgrad_t_1 = pd.DataFrame(df_val_faktureringsgrad_variables[['Faktureringsgrad (t-1)']])
faktureringsgrad_t_1 = pd.DataFrame(faktureringsgrad_t_1.iloc[0]).transpose()
faktureringsgrad_t_1_forecast = df_forecast_periode
faktureringsgrad_t_1_forecast = faktureringsgrad_t_1_forecast.set_index('Periode')
faktureringsgrad_t_1_forecast = faktureringsgrad_t_1_forecast.merge(faktureringsgrad_t_1,how = 'left',left_index = True,
right_index = True)
fakturerbartid_t_1 = pd.DataFrame(df_val_fakturerbartid_variables[['Fakturerbar tid (t-1)']])
fakturerbartid_t_1 = pd.DataFrame(fakturerbartid_t_1.iloc[0]).transpose()
fakturerbartid_t_1_forecast = df_forecast_periode
fakturerbartid_t_1_forecast = fakturerbartid_t_1_forecast.set_index('Periode')
fakturerbartid_t_1_forecast = fakturerbartid_t_1_forecast.merge(fakturerbartid_t_1,how = 'left',left_index = True,
right_index = True)
antalp_t_1 = pd.DataFrame(df_val_antalp_variables[['Antal produktionsmedarbejdere (t-1)']])
antalp_t_1 = pd.DataFrame(antalp_t_1.iloc[0]).transpose()
antalp_t_1_forecast = df_forecast_periode
antalp_t_1_forecast = antalp_t_1_forecast.set_index('Periode')
antalp_t_1_forecast = antalp_t_1_forecast.merge(antalp_t_1,how = 'left',left_index = True,
right_index = True)
monthly_forecast_dummies = df_forecast_periode
monthly_forecast_dummies = monthly_forecast_dummies.set_index('Periode')
monthly_forecast_dummies['Måned (t)'] = monthly_forecast_dummies.index.month
dummies = pd.get_dummies(monthly_forecast_dummies['Måned (t)'])
monthly_forecast_dummies = monthly_forecast_dummies.drop('Måned (t)',axis = 1)
dummies = dummies.drop(1,axis = 1)
dummies.columns = ['Februar (t)','Marts (t)','April (t)','Maj (t)','Juni (t)','Juli (t)','August (t)','September (t)',
'Oktober (t)','November (t)','December (t)']
monthly_forecast_dummies = monthly_forecast_dummies.join(dummies)
yearly_forecast_variable = df_forecast_periode
yearly_forecast_variable = yearly_forecast_variable.set_index('Periode')
yearly_forecast_variable['År (t)'] = yearly_forecast_variable.index.year
df_forecast_omsætning = pd.DataFrame(columns = ['Omsætning (t)','Vareforbrug (t)','Jobtimer (t)'],
index = df_forecast_periode.Periode)
df_forecast_omsætning['Omsætning (t-1)'] = omsætning_t_1_forecast
df_forecast_omsætning['Vareforbrug (t-1)'] = vareforbrug_t_1_forecast
df_forecast_vareforbrug = pd.DataFrame(columns = ['Vareforbrug (t)','Jobtimer (t)','Antal produktionsmedarbejdere (t)',
'Lønforbrug (t)'],
index = df_forecast_periode.Periode)
df_forecast_vareforbrug['Omsætning (t-1)'] = omsætning_t_1_forecast
df_forecast_vareforbrug['Vareforbrug (t-1)'] = vareforbrug_t_1_forecast
df_forecast_lønforbrug = pd.DataFrame(columns = ['Lønforbrug (t)','Jobtimer (t)','Antal produktionsmedarbejdere (t)'],
index = df_forecast_periode.Periode)
df_forecast_lønforbrug['Lønforbrug (t-1)'] = lønforbrug_t_1_forecast
df_forecast_lønforbrug['Arbejdsdage - ferie (t)'] = df_arbejdsdage_ferie_forecast
df_forecast_faktureringsgrad = pd.DataFrame(columns = ['Faktureringsgrad (t)'],index = df_forecast_periode.Periode)
df_forecast_faktureringsgrad = df_forecast_faktureringsgrad.merge(monthly_forecast_dummies,left_index = True,
right_index = True)
df_forecast_faktureringsgrad['Faktureringsgrad (t-1)'] = faktureringsgrad_t_1_forecast
df_forecast_faktureringsgrad['Arbejdsdage - ferie (t)'] = df_arbejdsdage_ferie_forecast
df_forecast_fakturerbartid = pd.DataFrame(columns = ['Fakturerbar tid (t)','Antal produktionsmedarbejdere (t)'],
index = df_forecast_periode.Periode)
df_forecast_fakturerbartid['Fakturerbar tid (t-1)'] = fakturerbartid_t_1_forecast
df_forecast_fakturerbartid['Arbejdsdage - ferie (t)'] = df_arbejdsdage_ferie_forecast
df_forecast_antalp = pd.DataFrame(columns = ['Antal produktionsmedarbejdere (t)'],
index = df_forecast_periode.Periode)
df_forecast_antalp['Antal produktionsmedarbejdere (t-1)'] = antalp_t_1_forecast
df_forecast_antalp = df_forecast_antalp.merge(monthly_forecast_dummies,left_index = True,right_index = True)
final_model_omsætning = load_model(r'C:\Users\jupe\Desktop\Praktik\210\Omsætning')
final_model_vareforbrug = load_model(r'C:\Users\jupe\Desktop\Praktik\210\Vareforbrug')
final_model_lønforbrug = load_model(r'C:\Users\jupe\Desktop\Praktik\210\Lønforbrug')
final_model_faktureringsgrad = load_model(r'C:\Users\jupe\Desktop\Praktik\210\Faktureringsgrad')
final_model_fakturerbartid = load_model(r'C:\Users\jupe\Desktop\Praktik\210\Fakturerbartid')
final_model_antalp = load_model(r'C:\Users\jupe\Desktop\Praktik\210\Antalp')
Transformation Pipeline and Model Successfully Loaded Transformation Pipeline and Model Successfully Loaded Transformation Pipeline and Model Successfully Loaded Transformation Pipeline and Model Successfully Loaded Transformation Pipeline and Model Successfully Loaded Transformation Pipeline and Model Successfully Loaded
for i in range (0,len(df_forecast_omsætning)):
Måned_år = df_forecast_periode.Periode[i]
if i > 0:
Sidste_måned_år = df_forecast_periode.Periode[i-1]
sidste_måned_antalp = np.array(df_forecast_antalp.loc[Sidste_måned_år,'Antal produktionsmedarbejdere (t)'])
sidste_måned_vareforbrug = np.array(df_forecast_vareforbrug.loc[Sidste_måned_år,'Vareforbrug (t)'])
sidste_måned_omsætning = np.array(df_forecast_omsætning.loc[Sidste_måned_år,'Omsætning (t)'])
sidste_måned_lønforbrug = np.array(df_forecast_lønforbrug.loc[Sidste_måned_år,'Lønforbrug (t)'])
sidste_måned_jobtimer = np.array(df_forecast_lønforbrug.loc[Sidste_måned_år,'Jobtimer (t)'])
sidste_måned_faktureringsgrad = np.array(df_forecast_faktureringsgrad.loc[Sidste_måned_år,'Faktureringsgrad (t)'])
sidste_måned_fakturerbartid = np.array(df_forecast_fakturerbartid.loc[Sidste_måned_år,'Fakturerbar tid (t)'])
df_forecast_omsætning.loc[Måned_år,'Omsætning (t-1)'] = sidste_måned_omsætning
df_forecast_omsætning.loc[Måned_år,'Vareforbrug (t-1)'] = sidste_måned_vareforbrug
df_forecast_lønforbrug.loc[Måned_år,'Lønforbrug (t-1)'] = sidste_måned_lønforbrug
df_forecast_faktureringsgrad.loc[Måned_år,'Faktureringsgrad (t-1)'] = sidste_måned_faktureringsgrad
df_forecast_fakturerbartid.loc[Måned_år,'Fakturerbar tid (t-1)'] = sidste_måned_fakturerbartid
df_forecast_antalp.loc[Måned_år,'Antal produktionsmedarbejdere (t-1)'] = sidste_måned_antalp
predictions_antal_p = predict_model(final_model_antalp, data = df_forecast_antalp)
predicted_antal_p = predictions_antal_p.iloc[i,-1]
df_forecast_antalp.loc[Måned_år,'Antal produktionsmedarbejdere (t)'] = predicted_antal_p
df_forecast_fakturerbartid.loc[Måned_år,'Antal produktionsmedarbejdere (t)'] = predicted_antal_p
predictions_fakturerbartid = predict_model(final_model_fakturerbartid, data = df_forecast_fakturerbartid)
predicted_fakturerbartid = predictions_fakturerbartid.iloc[i,-1]
df_forecast_fakturerbartid.loc[Måned_år,'Fakturerbar tid (t)'] = predicted_fakturerbartid
predictions_faktureringsgrad = predict_model(final_model_faktureringsgrad, data = df_forecast_faktureringsgrad)
predicted_faktureringsgrad = predictions_faktureringsgrad.iloc[i,-1]
df_forecast_faktureringsgrad.loc[Måned_år,'Faktureringsgrad (t)'] = predicted_faktureringsgrad
predicted_faktureringsgrad_unscaled = predicted_faktureringsgrad*(faktureringsgrad_train_max-faktureringsgrad_train_min)+faktureringsgrad_train_min
if i == 0:
fakturerbartid_t_1 = df['Fakturerbar tid (t)']
fakturerbartid_t_1 = fakturerbartid_t_1.loc['2020-12-01 00:00:00']
fakturerbartid_t_1 = np.log(fakturerbartid_t_1)
fakturerbartid_t_1 = fakturerbartid_t_1-periodic_estimate_fakturerbartid[11:12].values
else:
fakturerbartid_t_1 = predicted_fakturerbartid_unscaled
fakturerbartid_t_1 = np.log(fakturerbartid_t_1)
fakturerbartid_t_1 = fakturerbartid_t_1-periodic_estimate_fakturerbartid[i-1]
predicted_fakturerbartid_unscaled = predicted_fakturerbartid*(fakturerbartid_train_max-fakturerbartid_train_min)+fakturerbartid_train_min
predicted_fakturerbartid_unscaled = predicted_fakturerbartid_unscaled+fakturerbartid_t_1
predicted_fakturerbartid_unscaled = predicted_fakturerbartid_unscaled+periodic_estimate_fakturerbartid[i]
predicted_fakturerbartid_unscaled = np.exp(predicted_fakturerbartid_unscaled)
if i == 0:
jobtimer_t_1 = df['Jobtimer (t)']
jobtimer_t_1 = jobtimer_t_1.loc['2020-12-01 00:00:00']
jobtimer_t_1 = jobtimer_t_1-periodic_estimate_jobtimer[11:12].values
else:
jobtimer_t_1 = predicted_jobtimer_unscaled
jobtimer_t_1 = jobtimer_t_1-periodic_estimate_jobtimer[i-1]
predicted_jobtimer_unscaled = predicted_fakturerbartid_unscaled/predicted_faktureringsgrad_unscaled
predicted_jobtimer = predicted_jobtimer_unscaled-periodic_estimate_jobtimer[i]
predicted_jobtimer = predicted_jobtimer-jobtimer_t_1
predicted_jobtimer = (predicted_jobtimer-jobtimer_train_min)/(jobtimer_train_max-jobtimer_train_min)
df_forecast_lønforbrug.loc[Måned_år,'Jobtimer (t)'] = predicted_jobtimer
df_forecast_lønforbrug.loc[Måned_år,'Antal produktionsmedarbejdere (t)'] = predicted_antal_p
predictions_lønforbrug = predict_model(final_model_lønforbrug, data = df_forecast_lønforbrug)
predicted_lønforbrug = predictions_lønforbrug.iloc[i,-1]
df_forecast_lønforbrug.loc[Måned_år,'Lønforbrug (t)'] = predicted_lønforbrug
df_forecast_vareforbrug.loc[Måned_år,'Jobtimer (t)'] = predicted_jobtimer
df_forecast_vareforbrug.loc[Måned_år,'Lønforbrug (t)'] = predicted_lønforbrug
predictions_vareforbrug = predict_model(final_model_vareforbrug, data = df_forecast_vareforbrug)
predicted_vareforbrug = predictions_vareforbrug.iloc[i,-1]
df_forecast_vareforbrug.loc[Måned_år,'Vareforbrug (t)'] = predicted_vareforbrug
df_forecast_omsætning.loc[Måned_år,'Vareforbrug (t)'] = predicted_vareforbrug
df_forecast_omsætning.loc[Måned_år,'Jobtimer (t)'] = predicted_jobtimer
predictions_omsætning = predict_model(final_model_omsætning, data = df_forecast_omsætning)
predicted_omsætning = predictions_omsætning.iloc[i,-1]
df_forecast_omsætning.loc[Måned_år,'Omsætning (t)'] = predicted_omsætning
df_forecast = pd.DataFrame()
df_forecast['Omsætning (t)'] = df_forecast_omsætning['Omsætning (t)']
df_forecast['Vareforbrug (t)'] = df_forecast_vareforbrug['Vareforbrug (t)']
df_forecast['Lønforbrug (t)'] = df_forecast_lønforbrug['Lønforbrug (t)']
df_forecast['Faktureringsgrad (t)'] = df_forecast_faktureringsgrad['Faktureringsgrad (t)']
df_forecast['Fakturerbar tid (t)'] = df_forecast_fakturerbartid['Fakturerbar tid (t)']
df_forecast['Antal produktionsmedarbejdere (t)'] = df_forecast_antalp['Antal produktionsmedarbejdere (t)']
df_forecast['Jobtimer (t)'] = df_forecast_vareforbrug['Jobtimer (t)']
df_forecast['Omsætning (t)'] = df_forecast['Omsætning (t)']*(omsætning_train_max-omsætning_train_min)+omsætning_train_min
for i in range (0,len(df_forecast)):
Måned_år = df_forecast.index[i]
if i == 0:
omsætning2020_12 = df['Omsætning (t)']
omsætning2020_12 = omsætning2020_12.loc['2020-12-01 00:00:00']
omsætning2020_12 = omsætning2020_12-periodic_estimate_omsætning[11:12].values
df_forecast.loc[Måned_år,'Omsætning (t)'] = omsætning2020_12+df_forecast['Omsætning (t)'].iloc[i]
df_forecast.loc[Måned_år,'Omsætning (t)']= df_forecast['Omsætning (t)'].iloc[i]+periodic_estimate_omsætning[i]
else:
omsætning_t_1 = df_forecast['Omsætning (t)'].iloc[i-1]-periodic_estimate_omsætning[i-1]
df_forecast.loc[Måned_år,'Omsætning (t)'] = omsætning_t_1+df_forecast['Omsætning (t)'].iloc[i]
df_forecast.loc[Måned_år,'Omsætning (t)'] = df_forecast['Omsætning (t)'].iloc[i]+periodic_estimate_omsætning[i]
df_forecast['Vareforbrug (t)'] = df_forecast['Vareforbrug (t)']*(vareforbrug_train_max-vareforbrug_train_min)+vareforbrug_train_min
for i in range (0,len(df_forecast)):
Måned_år = df_forecast.index[i]
if i == 0:
vareforbrug2020_12 = df['Vareforbrug (t)']
vareforbrug2020_12 = vareforbrug2020_12.loc['2020-12-01 00:00:00']
vareforbrug2020_12 = vareforbrug2020_12-periodic_estimate_vareforbrug[11:12].values
df_forecast.loc[Måned_år,'Vareforbrug (t)'] = vareforbrug2020_12+df_forecast['Vareforbrug (t)'].iloc[i]
df_forecast.loc[Måned_år,'Vareforbrug (t)']= df_forecast['Vareforbrug (t)'].iloc[i]+periodic_estimate_vareforbrug[i]
else:
vareforbrug_t_1 = df_forecast['Vareforbrug (t)'].iloc[i-1]-periodic_estimate_vareforbrug[i-1]
df_forecast.loc[Måned_år,'Vareforbrug (t)'] = vareforbrug_t_1+df_forecast['Vareforbrug (t)'].iloc[i]
df_forecast.loc[Måned_år,'Vareforbrug (t)'] = df_forecast['Vareforbrug (t)'].iloc[i]+periodic_estimate_vareforbrug[i]
df_forecast['Lønforbrug (t)'] = df_forecast['Lønforbrug (t)']*(lønforbrug_train_max-lønforbrug_train_min)+lønforbrug_train_min
for i in range (0,len(df_forecast)):
Måned_år = df_forecast.index[i]
if i == 0:
lønforbrug2020_12 = df['Lønforbrug (t)']
lønforbrug2020_12 = lønforbrug2020_12.loc['2020-12-01 00:00:00']
lønforbrug2020_12 = lønforbrug2020_12-periodic_estimate_lønforbrug[11:12].values
df_forecast.loc[Måned_år,'Lønforbrug (t)'] = lønforbrug2020_12+df_forecast['Lønforbrug (t)'].iloc[i]
df_forecast.loc[Måned_år,'Lønforbrug (t)']= df_forecast['Lønforbrug (t)'].iloc[i]+periodic_estimate_lønforbrug[i]
else:
lønforbrug_t_1 = df_forecast['Lønforbrug (t)'].iloc[i-1]-periodic_estimate_lønforbrug[i-1]
df_forecast.loc[Måned_år,'Lønforbrug (t)'] = lønforbrug_t_1+df_forecast['Lønforbrug (t)'].iloc[i]
df_forecast.loc[Måned_år,'Lønforbrug (t)'] = df_forecast['Lønforbrug (t)'].iloc[i]+periodic_estimate_lønforbrug[i]
df_forecast['Faktureringsgrad (t)'] = df_forecast['Faktureringsgrad (t)']*(faktureringsgrad_train_max-faktureringsgrad_train_min)+faktureringsgrad_train_min
df_forecast['Fakturerbar tid (t)'] = df_forecast['Fakturerbar tid (t)']*(fakturerbartid_train_max-fakturerbartid_train_min)+fakturerbartid_train_min
for i in range (0,len(df_forecast)):
Måned_år = df_forecast.index[i]
if i == 0:
fakturerbartid2020_12 = df['Fakturerbar tid (t)']
fakturerbartid2020_12 = fakturerbartid2020_12.loc['2020-12-01 00:00:00']
fakturerbartid2020_12 = np.log(fakturerbartid2020_12)
fakturerbartid2020_12 = fakturerbartid2020_12-periodic_estimate_fakturerbartid[11:12].values
df_forecast.loc[Måned_år,'Fakturerbar tid (t)'] = fakturerbartid2020_12+df_forecast['Fakturerbar tid (t)'].iloc[i]
df_forecast.loc[Måned_år,'Fakturerbar tid (t)']= df_forecast['Fakturerbar tid (t)'].iloc[i]+periodic_estimate_fakturerbartid[i]
df_forecast.loc[Måned_år,'Fakturerbar tid (t)']= np.exp(df_forecast['Fakturerbar tid (t)'].iloc[i])
else:
fakturerbartid_t_1 = np.log(df_forecast['Fakturerbar tid (t)'].iloc[i-1])
fakturerbartid_t_1 = fakturerbartid_t_1 -periodic_estimate_fakturerbartid[i-1]
df_forecast.loc[Måned_år,'Fakturerbar tid (t)'] = fakturerbartid_t_1+df_forecast['Fakturerbar tid (t)'].iloc[i]
df_forecast.loc[Måned_år,'Fakturerbar tid (t)'] = df_forecast['Fakturerbar tid (t)'].iloc[i]+periodic_estimate_fakturerbartid[i]
df_forecast.loc[Måned_år,'Fakturerbar tid (t)']= np.exp(df_forecast['Fakturerbar tid (t)'].iloc[i])
df_forecast['Antal produktionsmedarbejdere (t)'] = df_forecast['Antal produktionsmedarbejdere (t)']*(antalp_train_max-antalp_train_min)+antalp_train_min
for i in range (0,len(df_forecast)):
Måned_år = df_forecast.index[i]
if i == 0:
antalp2020_12 = df['Antal produktionsmedarbejdere (t)']
antalp2020_12 = antalp2020_12.loc['2020-12-01 00:00:00']
antalp2020_12 = np.log(antalp2020_12)
antalp2020_12 = antalp2020_12-periodic_estimate_antalp[11:12].values
df_forecast.loc[Måned_år,'Antal produktionsmedarbejdere (t)'] = antalp2020_12+df_forecast['Antal produktionsmedarbejdere (t)'].iloc[i]
df_forecast.loc[Måned_år,'Antal produktionsmedarbejdere (t)']= df_forecast['Antal produktionsmedarbejdere (t)'].iloc[i]+periodic_estimate_antalp[i]
df_forecast.loc[Måned_år,'Antal produktionsmedarbejdere (t)']= np.exp(df_forecast['Antal produktionsmedarbejdere (t)'].iloc[i])
else:
antalp_t_1 = np.log(df_forecast['Antal produktionsmedarbejdere (t)'].iloc[i-1])
antalp_t_1 = antalp_t_1-periodic_estimate_antalp[i-1]
df_forecast.loc[Måned_år,'Antal produktionsmedarbejdere (t)'] = antalp_t_1+df_forecast['Antal produktionsmedarbejdere (t)'].iloc[i]
df_forecast.loc[Måned_år,'Antal produktionsmedarbejdere (t)'] = df_forecast['Antal produktionsmedarbejdere (t)'].iloc[i]+periodic_estimate_antalp[i]
df_forecast.loc[Måned_år,'Antal produktionsmedarbejdere (t)']= np.exp(df_forecast['Antal produktionsmedarbejdere (t)'].iloc[i])
df_forecast['Jobtimer (t)'] = df_forecast['Jobtimer (t)']*(jobtimer_train_max-jobtimer_train_min)+jobtimer_train_min
for i in range (0,len(df_forecast)):
Måned_år = df_forecast.index[i]
if i == 0:
jobtimer2020_12 = df['Jobtimer (t)']
jobtimer2020_12 = jobtimer2020_12.loc['2020-12-01 00:00:00']
jobtimer2020_12 = jobtimer2020_12-periodic_estimate_jobtimer[11:12].values
df_forecast.loc[Måned_år,'Jobtimer (t)'] = jobtimer2020_12+df_forecast['Jobtimer (t)'].iloc[i]
df_forecast.loc[Måned_år,'Jobtimer (t)']= df_forecast['Jobtimer (t)'].iloc[i]+periodic_estimate_jobtimer[i]
else:
jobtimer_t_1 = df_forecast['Jobtimer (t)'].iloc[i-1]-periodic_estimate_jobtimer[i-1]
df_forecast.loc[Måned_år,'Jobtimer (t)'] = jobtimer_t_1+df_forecast['Jobtimer (t)'].iloc[i]
df_forecast.loc[Måned_år,'Jobtimer (t)'] = df_forecast['Jobtimer (t)'].iloc[i]+periodic_estimate_jobtimer[i]
df_forecast['Dækningsbidrag (t)'] = df_forecast['Omsætning (t)']-df_forecast['Vareforbrug (t)'] -df_forecast['Lønforbrug (t)']
df_forecast_val_dækningsbidrag = pd.DataFrame(df_forecast['Dækningsbidrag (t)'])
df_forecast_val_dækningsbidrag = df_forecast_val_dækningsbidrag.rename(columns={'Dækningsbidrag (t)': 'Dækningsbidrag forecast'})
df_forecast_val_dækningsbidrag['Dækningsbidrag realiseret'] = df_val_copy['Omsætning (t)']-df_val_copy['Vareforbrug (t)'] -df_val_copy['Lønforbrug (t)']
df_forecast_val_dækningsbidrag = df_forecast_val_dækningsbidrag.dropna(axis = 0)
df_forecast_val_dækningsbidrag
dækningsbidrag_val_rmse = mean_squared_error(df_forecast_val_dækningsbidrag['Dækningsbidrag realiseret'],
df_forecast_val_dækningsbidrag['Dækningsbidrag forecast'],squared=False)
realiseretgennemsnitvaldækningsbidrag = np.mean(df_forecast_val_dækningsbidrag['Dækningsbidrag realiseret'])
forecastgennemsnitvaldækningsbidrag = np.mean(df_forecast_val_dækningsbidrag['Dækningsbidrag forecast'])
print(f"val RMSE dækningsbidrag: {dækningsbidrag_val_rmse} \nval gennemsnit realiseret dækningsbidrag: {realiseretgennemsnitvaldækningsbidrag} \nval gennemsnit forecast dækningsbidrag: {forecastgennemsnitvaldækningsbidrag}")
val RMSE dækningsbidrag: 529569.8530962383 val gennemsnit realiseret dækningsbidrag: 1254598.0187500003 val gennemsnit forecast dækningsbidrag: 1749835.2960194945
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_val_dækningsbidrag)
ax.set(xlabel="År og måned",
ylabel="Dækningsbidrag (mdkk)",
title="Dækningsbidrag forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
ax.legend(['Dækningsbidrag forecast','Dækningsbidrag realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_val_omsætning = pd.DataFrame(df_forecast['Omsætning (t)'])
df_forecast_val_omsætning = df_forecast_val_omsætning.rename(columns={'Omsætning (t)': 'Omsætning forecast'})
df_forecast_val_omsætning['Omsætning realiseret'] = df_val_copy['Omsætning (t)']
df_forecast_val_omsætning = df_forecast_val_omsætning.dropna(axis = 0)
df_forecast_val_omsætning
omsætning_val_rmse = mean_squared_error(df_forecast_val_omsætning['Omsætning realiseret'],
df_forecast_val_omsætning['Omsætning forecast'],squared=False)
realiseretgennemsnitvalomsætning = np.mean(df_forecast_val_omsætning['Omsætning realiseret'])
forecastgennemsnitvalomsætning = np.mean(df_forecast_val_omsætning['Omsætning forecast'])
print(f"val RMSE omsætning: {omsætning_val_rmse} \nval gennemsnit realiseret omsætning: {realiseretgennemsnitvalomsætning} \nval gennemsnit forecast omsætning: {forecastgennemsnitvalomsætning}")
val RMSE omsætning: 581748.2262998134 val gennemsnit realiseret omsætning: 4739817.2375 val gennemsnit forecast omsætning: 5069144.685099592
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_val_omsætning)
ax.set(xlabel="År og måned",
ylabel="Omsætning (mdkk)",
title="Omsætning forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
ax.legend(['Omsætning forecast','Omsætning realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_val_vareforbrug = pd.DataFrame(df_forecast['Vareforbrug (t)'])
df_forecast_val_vareforbrug = df_forecast_val_vareforbrug.rename(columns={'Vareforbrug (t)': 'Vareforbrug forecast'})
df_forecast_val_vareforbrug['Vareforbrug realiseret'] = df_val_copy['Vareforbrug (t)']
df_forecast_val_vareforbrug = df_forecast_val_vareforbrug.dropna(axis = 0)
df_forecast_val_vareforbrug
vareforbrug_val_rmse = mean_squared_error(df_forecast_val_vareforbrug['Vareforbrug realiseret'],
df_forecast_val_vareforbrug['Vareforbrug forecast'],squared=False)
realiseretgennemsnitvalvareforbrug = np.mean(df_forecast_val_vareforbrug['Vareforbrug realiseret'])
forecastgennemsnitvalvareforbrug = np.mean(df_forecast_val_vareforbrug['Vareforbrug forecast'])
print(f"val RMSE vareforbrug: {vareforbrug_val_rmse} \nval gennemsnit realiseret vareforbrug: {realiseretgennemsnitvalvareforbrug} \nval gennemsnit forecast vareforbrug: {forecastgennemsnitvalvareforbrug}")
val RMSE vareforbrug: 344589.0722096242 val gennemsnit realiseret vareforbrug: 1372559.0675 val gennemsnit forecast vareforbrug: 1161600.9062907647
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_val_vareforbrug)
ax.set(xlabel="År og måned",
ylabel="Vareforbrug (mdkk)",
title="Vareforbrug forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
ax.legend(['Vareforbrug forecast','Vareforbrug realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_val_lønforbrug = pd.DataFrame(df_forecast['Lønforbrug (t)'])
df_forecast_val_lønforbrug = df_forecast_val_lønforbrug.rename(columns={'Lønforbrug (t)': 'Lønforbrug forecast'})
df_forecast_val_lønforbrug['Lønforbrug realiseret'] = df_val_copy['Lønforbrug (t)']
df_forecast_val_lønforbrug = df_forecast_val_lønforbrug.dropna(axis = 0)
df_forecast_val_lønforbrug
lønforbrug_val_rmse = mean_squared_error(df_forecast_val_lønforbrug['Lønforbrug realiseret'],
df_forecast_val_lønforbrug['Lønforbrug forecast'],squared=False)
realiseretgennemsnitvallønforbrug = np.mean(df_forecast_val_lønforbrug['Lønforbrug realiseret'])
forecastgennemsnitvallønforbrug = np.mean(df_forecast_val_lønforbrug['Lønforbrug forecast'])
print(f"val RMSE lønforbrug: {lønforbrug_val_rmse} \nval gennemsnit realiseret lønforbrug: {realiseretgennemsnitvallønforbrug} \nval gennemsnit forecast lønforbrug: {forecastgennemsnitvallønforbrug}")
val RMSE lønforbrug: 146627.09003546895 val gennemsnit realiseret lønforbrug: 2112660.15125 val gennemsnit forecast lønforbrug: 2157708.4827893325
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_val_lønforbrug)
ax.set(xlabel="År og måned",
ylabel="Lønforbrug (mdkk)",
title="Lønforbrug forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
ax.legend(['Lønforbrug forecast','Lønforbrug realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_val_faktureringsgrad = pd.DataFrame(df_forecast['Faktureringsgrad (t)'])
df_forecast_val_faktureringsgrad = df_forecast_val_faktureringsgrad.rename(columns={'Faktureringsgrad (t)': 'Faktureringsgrad forecast'})
df_forecast_val_faktureringsgrad['Faktureringsgrad realiseret'] = df_val_copy['Faktureringsgrad (t)']
df_forecast_val_faktureringsgrad = df_forecast_val_faktureringsgrad.dropna(axis = 0)
df_forecast_val_faktureringsgrad
faktureringsgrad_val_rmse = mean_squared_error(df_forecast_val_faktureringsgrad['Faktureringsgrad realiseret'],
df_forecast_val_faktureringsgrad['Faktureringsgrad forecast'],squared=False)
realiseretgennemsnitvalfaktureringsgrad = np.mean(df_forecast_val_faktureringsgrad['Faktureringsgrad realiseret'])
forecastgennemsnitvalfaktureringsgrad = np.mean(df_forecast_val_faktureringsgrad['Faktureringsgrad forecast'])
print(f"val RMSE faktureringsgrad: {faktureringsgrad_val_rmse} \nval gennemsnit realiseret faktureringsgrad: {realiseretgennemsnitvalfaktureringsgrad} \nval gennemsnit forecast faktureringsgrad: {forecastgennemsnitvalfaktureringsgrad}")
val RMSE faktureringsgrad: 0.027400576194875695 val gennemsnit realiseret faktureringsgrad: 0.9190631896448261 val gennemsnit forecast faktureringsgrad: 0.9010229005045723
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_val_faktureringsgrad)
ax.set(xlabel="År og måned",
ylabel="Faktureringsgrad",
title="Faktureringsgrad forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
ax.legend(['Faktureringsgrad forecast','Faktureringsgrad realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_val_jobtimer = pd.DataFrame(df_forecast['Jobtimer (t)'])
df_forecast_val_jobtimer = df_forecast_val_jobtimer.rename(columns={'Jobtimer (t)': 'Jobtimer forecast'})
df_forecast_val_jobtimer['Jobtimer realiseret'] = df_val_copy['Jobtimer (t)']
df_forecast_val_jobtimer = df_forecast_val_jobtimer.dropna(axis = 0)
df_forecast_val_jobtimer
jobtimer_val_rmse = mean_squared_error(df_forecast_val_jobtimer['Jobtimer realiseret'],
df_forecast_val_jobtimer['Jobtimer forecast'],squared=False)
realiseretgennemsnitvaljobtimer = np.mean(df_forecast_val_jobtimer['Jobtimer realiseret'])
forecastgennemsnitvaljobtimer = np.mean(df_forecast_val_jobtimer['Jobtimer forecast'])
print(f"val RMSE jobtimer: {jobtimer_val_rmse} \nval gennemsnit realiseret jobtimer: {realiseretgennemsnitvaljobtimer} \nval gennemsnit forecast jobtimer: {forecastgennemsnitvaljobtimer}")
val RMSE jobtimer: 1271.3311383839011 val gennemsnit realiseret jobtimer: 7557.89625 val gennemsnit forecast jobtimer: 8710.203312605734
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_val_jobtimer)
ax.set(xlabel="År og måned",
ylabel="Jobtimer",
title="Jobtimer forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
ax.legend(['Jobtimer forecast','Jobtimer realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_val_fakturerbartid = pd.DataFrame(df_forecast['Fakturerbar tid (t)'])
df_forecast_val_fakturerbartid = df_forecast_val_fakturerbartid.rename(columns={'Fakturerbar tid (t)': 'Fakturerbar tid forecast'})
df_forecast_val_fakturerbartid['Fakturerbar tid realiseret'] = df_val_copy['Fakturerbar tid (t)']
df_forecast_val_fakturerbartid = df_forecast_val_fakturerbartid.dropna(axis = 0)
df_forecast_val_fakturerbartid
fakturerbartid_val_rmse = mean_squared_error(df_forecast_val_fakturerbartid['Fakturerbar tid realiseret'],
df_forecast_val_fakturerbartid['Fakturerbar tid forecast'],squared=False)
realiseretgennemsnitvalfakturerbartid = np.mean(df_forecast_val_fakturerbartid['Fakturerbar tid realiseret'])
forecastgennemsnitvalfakturerbartid = np.mean(df_forecast_val_fakturerbartid['Fakturerbar tid forecast'])
print(f"val RMSE fakturerbar tid: {fakturerbartid_val_rmse} \nval gennemsnit realiseret fakturerbar tid: {realiseretgennemsnitvalfakturerbartid} \nval gennemsnit forecast fakturerbar tid: {forecastgennemsnitvalfakturerbartid}")
val RMSE fakturerbar tid: 992.1655614616113 val gennemsnit realiseret fakturerbar tid: 6948.115 val gennemsnit forecast fakturerbar tid: 7848.092652708551
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_val_fakturerbartid)
ax.set(xlabel="År og måned",
ylabel="Fakturerbar tid",
title="Fakturerbar tid forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
ax.legend(['Fakturerbar tid forecast','Fakturerbar tid realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_val_antalp = pd.DataFrame(df_forecast['Antal produktionsmedarbejdere (t)'])
df_forecast_val_antalp = df_forecast_val_antalp.rename(columns={'Antal produktionsmedarbejdere (t)': 'Antal produktionsmedarbejdere forecast'})
df_forecast_val_antalp['Antal produktionsmedarbejdere realiseret'] = df_val_copy['Antal produktionsmedarbejdere (t)']
df_forecast_val_antalp = df_forecast_val_antalp.dropna(axis = 0)
df_forecast_val_antalp
antalp_val_rmse = mean_squared_error(df_forecast_val_antalp['Antal produktionsmedarbejdere realiseret'],
df_forecast_val_antalp['Antal produktionsmedarbejdere forecast'],squared=False)
realiseretgennemsnitvalantalp = np.mean(df_forecast_val_antalp['Antal produktionsmedarbejdere realiseret'])
forecastgennemsnitvalantalp = np.mean(df_forecast_val_antalp['Antal produktionsmedarbejdere forecast'])
print(f"val RMSE antal produktionsmedarbejdere: {antalp_val_rmse} \nval gennemsnit realiseret antal produktionsmedarbejdere: {realiseretgennemsnitvalantalp} \nval gennemsnit forecast antal produktionsmedarbejdere: {forecastgennemsnitvalantalp}")
val RMSE antal produktionsmedarbejdere: 1.2886441318434514 val gennemsnit realiseret antal produktionsmedarbejdere: 65.625 val gennemsnit forecast antal produktionsmedarbejdere: 65.73437323242807
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_val_antalp)
ax.set(xlabel="År og måned",
ylabel="Antal produktionsmedarbejdere",
title="Antal produktionsmedarbejdere forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
ax.legend(['Antal produktionsmedarbejdere forecast','Antal produktionsmedarbejdere realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_test_dækningsbidrag = pd.DataFrame(df_forecast['Dækningsbidrag (t)'])
df_forecast_test_dækningsbidrag = df_forecast_test_dækningsbidrag.rename(columns={'Dækningsbidrag (t)': 'Dækningsbidrag forecast'})
df_forecast_test_dækningsbidrag['Dækningsbidrag realiseret'] = df_test_copy['Omsætning (t)']-df_test_copy['Vareforbrug (t)'] -df_test_copy['Lønforbrug (t)']
df_forecast_test_dækningsbidrag = df_forecast_test_dækningsbidrag.dropna(axis = 0)
df_forecast_test_dækningsbidrag
dækningsbidrag_test_rmse = mean_squared_error(df_forecast_test_dækningsbidrag['Dækningsbidrag realiseret'],
df_forecast_test_dækningsbidrag['Dækningsbidrag forecast'],squared=False)
realiseretgennemsnittestdækningsbidrag = np.mean(df_forecast_test_dækningsbidrag['Dækningsbidrag realiseret'])
forecastgennemsnittestdækningsbidrag = np.mean(df_forecast_test_dækningsbidrag['Dækningsbidrag forecast'])
print(f"Test RMSE dækningsbidrag: {dækningsbidrag_test_rmse} \nTest gennemsnit realiseret dækningsbidrag: {realiseretgennemsnittestdækningsbidrag} \nTest gennemsnit forecast dækningsbidrag: {forecastgennemsnittestdækningsbidrag}")
Test RMSE dækningsbidrag: 243255.92491748283 Test gennemsnit realiseret dækningsbidrag: 1490875.2999999996 Test gennemsnit forecast dækningsbidrag: 1733309.1910883554
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_test_dækningsbidrag)
ax.set(xlabel="År og måned",
ylabel="Dækningsbidrag (mdkk)",
title="Dækningsbidrag forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_major_locator(fmt_month)
ax.legend(['Dækningsbidrag forecast','Dækningsbidrag realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_test_omsætning = pd.DataFrame(df_forecast['Omsætning (t)'])
df_forecast_test_omsætning = df_forecast_test_omsætning.rename(columns={'Omsætning (t)': 'Omsætning forecast'})
df_forecast_test_omsætning['Omsætning realiseret'] = df_test_copy['Omsætning (t)']
df_forecast_test_omsætning = df_forecast_test_omsætning.dropna(axis = 0)
df_forecast_test_omsætning
omsætning_test_rmse = mean_squared_error(df_forecast_test_omsætning['Omsætning realiseret'],
df_forecast_test_omsætning['Omsætning forecast'],squared=False)
realiseretgennemsnittestomsætning = np.mean(df_forecast_test_omsætning['Omsætning realiseret'])
forecastgennemsnittestomsætning = np.mean(df_forecast_test_omsætning['Omsætning forecast'])
print(f"Test RMSE omsætning: {omsætning_test_rmse} \nTest gennemsnit realiseret omsætning: {realiseretgennemsnittestomsætning} \nTest gennemsnit forecast omsætning: {forecastgennemsnittestomsætning}")
Test RMSE omsætning: 315538.0667503667 Test gennemsnit realiseret omsætning: 5606485.366666666 Test gennemsnit forecast omsætning: 5596969.98922411
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_test_omsætning)
ax.set(xlabel="År og måned",
ylabel="Omsætning (mdkk)",
title="Omsætning forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_major_locator(fmt_month)
ax.legend(['Omsætning forecast','Omsætning realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_test_vareforbrug = pd.DataFrame(df_forecast['Vareforbrug (t)'])
df_forecast_test_vareforbrug = df_forecast_test_vareforbrug.rename(columns={'Vareforbrug (t)': 'Vareforbrug forecast'})
df_forecast_test_vareforbrug['Vareforbrug realiseret'] = df_test_copy['Vareforbrug (t)']
df_forecast_test_vareforbrug = df_forecast_test_vareforbrug.dropna(axis = 0)
df_forecast_test_vareforbrug
vareforbrug_test_rmse = mean_squared_error(df_forecast_test_vareforbrug['Vareforbrug realiseret'],
df_forecast_test_vareforbrug['Vareforbrug forecast'],squared=False)
realiseretgennemsnittestvareforbrug = np.mean(df_forecast_test_vareforbrug['Vareforbrug realiseret'])
forecastgennemsnittestvareforbrug = np.mean(df_forecast_test_vareforbrug['Vareforbrug forecast'])
print(f"Test RMSE vareforbrug: {vareforbrug_test_rmse} \nTest gennemsnit realiseret vareforbrug: {realiseretgennemsnittestvareforbrug} \nTest gennemsnit forecast vareforbrug: {forecastgennemsnittestvareforbrug}")
Test RMSE vareforbrug: 429772.0838960463 Test gennemsnit realiseret vareforbrug: 1627220.11 Test gennemsnit forecast vareforbrug: 1257374.7861916393
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_test_vareforbrug)
ax.set(xlabel="År og måned",
ylabel="Vareforbrug (mdkk)",
title="Vareforbrug forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_major_locator(fmt_month)
ax.legend(['Vareforbrug forecast','Vareforbrug realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_test_lønforbrug = pd.DataFrame(df_forecast['Lønforbrug (t)'])
df_forecast_test_lønforbrug = df_forecast_test_lønforbrug.rename(columns={'Lønforbrug (t)': 'Lønforbrug forecast'})
df_forecast_test_lønforbrug['Lønforbrug realiseret'] = df_test_copy['Lønforbrug (t)']
df_forecast_test_lønforbrug = df_forecast_test_lønforbrug.dropna(axis = 0)
df_forecast_test_lønforbrug
lønforbrug_test_rmse = mean_squared_error(df_forecast_test_lønforbrug['Lønforbrug realiseret'],
df_forecast_test_lønforbrug['Lønforbrug forecast'],squared=False)
realiseretgennemsnittestlønforbrug = np.mean(df_forecast_test_lønforbrug['Lønforbrug realiseret'])
forecastgennemsnittestlønforbrug = np.mean(df_forecast_test_lønforbrug['Lønforbrug forecast'])
print(f"Test RMSE lønforbrug: {lønforbrug_test_rmse} \nTest gennemsnit realiseret lønforbrug: {realiseretgennemsnittestlønforbrug} \nTest gennemsnit forecast lønforbrug: {forecastgennemsnittestlønforbrug}")
Test RMSE lønforbrug: 211669.99986430234 Test gennemsnit realiseret lønforbrug: 2488389.9566666665 Test gennemsnit forecast lønforbrug: 2606286.0119441156
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_test_lønforbrug)
ax.set(xlabel="År og måned",
ylabel="Lønforbrug (mdkk)",
title="Lønforbrug forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_major_locator(fmt_month)
ax.legend(['Lønforbrug forecast','Lønforbrug realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_test_faktureringsgrad = pd.DataFrame(df_forecast['Faktureringsgrad (t)'])
df_forecast_test_faktureringsgrad = df_forecast_test_faktureringsgrad.rename(columns={'Faktureringsgrad (t)': 'Faktureringsgrad forecast'})
df_forecast_test_faktureringsgrad['Faktureringsgrad realiseret'] = df_test_copy['Faktureringsgrad (t)']
df_forecast_test_faktureringsgrad = df_forecast_test_faktureringsgrad.dropna(axis = 0)
df_forecast_test_faktureringsgrad
faktureringsgrad_test_rmse = mean_squared_error(df_forecast_test_faktureringsgrad['Faktureringsgrad realiseret'],
df_forecast_test_faktureringsgrad['Faktureringsgrad forecast'],squared=False)
realiseretgennemsnittestfaktureringsgrad = np.mean(df_forecast_test_faktureringsgrad['Faktureringsgrad realiseret'])
forecastgennemsnittestfaktureringsgrad = np.mean(df_forecast_test_faktureringsgrad['Faktureringsgrad forecast'])
print(f"Test RMSE faktureringsgrad: {faktureringsgrad_test_rmse} \nTest gennemsnit realiseret faktureringsgrad: {realiseretgennemsnittestfaktureringsgrad} \nTest gennemsnit forecast faktureringsgrad: {forecastgennemsnittestfaktureringsgrad}")
Test RMSE faktureringsgrad: 0.008614368635692074 Test gennemsnit realiseret faktureringsgrad: 0.8956489793163481 Test gennemsnit forecast faktureringsgrad: 0.9010229005045723
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_test_faktureringsgrad)
ax.set(xlabel="År og måned",
ylabel="Faktureringsgrad",
title="Faktureringsgrad forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_major_locator(fmt_month)
ax.legend(['Faktureringsgrad forecast','Faktureringsgrad realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_test_jobtimer = pd.DataFrame(df_forecast['Jobtimer (t)'])
df_forecast_test_jobtimer = df_forecast_test_jobtimer.rename(columns={'Jobtimer (t)': 'Jobtimer forecast'})
df_forecast_test_jobtimer['Jobtimer realiseret'] = df_test_copy['Jobtimer (t)']
df_forecast_test_jobtimer = df_forecast_test_jobtimer.dropna(axis = 0)
df_forecast_test_jobtimer
jobtimer_test_rmse = mean_squared_error(df_forecast_test_jobtimer['Jobtimer realiseret'],
df_forecast_test_jobtimer['Jobtimer forecast'],squared=False)
realiseretgennemsnittestjobtimer = np.mean(df_forecast_test_jobtimer['Jobtimer realiseret'])
forecastgennemsnittestjobtimer = np.mean(df_forecast_test_jobtimer['Jobtimer forecast'])
print(f"Test RMSE jobtimer: {jobtimer_test_rmse} \nTest gennemsnit realiseret jobtimer: {realiseretgennemsnittestjobtimer} \nTest gennemsnit forecast jobtimer: {forecastgennemsnittestjobtimer}")
Test RMSE jobtimer: 1491.3785516025166 Test gennemsnit realiseret jobtimer: 8997.833333333334 Test gennemsnit forecast jobtimer: 10131.262319523279
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_test_jobtimer)
ax.set(xlabel="År og måned",
ylabel="Jobtimer",
title="Jobtimer forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_major_locator(fmt_month)
ax.legend(['Jobtimer forecast','Jobtimer realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_test_fakturerbartid = pd.DataFrame(df_forecast['Fakturerbar tid (t)'])
df_forecast_test_fakturerbartid = df_forecast_test_fakturerbartid.rename(columns={'Fakturerbar tid (t)': 'Fakturerbar tid forecast'})
df_forecast_test_fakturerbartid['Fakturerbar tid realiseret'] = df_test_copy['Fakturerbar tid (t)']
df_forecast_test_fakturerbartid = df_forecast_test_fakturerbartid.dropna(axis = 0)
df_forecast_test_fakturerbartid
fakturerbartid_test_rmse = mean_squared_error(df_forecast_test_fakturerbartid['Fakturerbar tid realiseret'],
df_forecast_test_fakturerbartid['Fakturerbar tid forecast'],squared=False)
realiseretgennemsnittestfakturerbartid = np.mean(df_forecast_test_fakturerbartid['Fakturerbar tid realiseret'])
forecastgennemsnittestfakturerbartid = np.mean(df_forecast_test_fakturerbartid['Fakturerbar tid forecast'])
print(f"Test RMSE fakturerbar tid: {fakturerbartid_test_rmse} \nTest gennemsnit realiseret fakturerbar tid: {realiseretgennemsnittestfakturerbartid} \nTest gennemsnit forecast fakturerbar tid: {forecastgennemsnittestfakturerbartid}")
Test RMSE fakturerbar tid: 1371.3025463947224 Test gennemsnit realiseret fakturerbar tid: 8061.5 Test gennemsnit forecast fakturerbar tid: 9128.499360909545
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_test_fakturerbartid)
ax.set(xlabel="År og måned",
ylabel="Fakturerbar tid",
title="Fakturerbar tid forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
fmt_month = mdates.MonthLocator()
ax.xaxis.set_major_locator(fmt_month)
ax.legend(['Fakturerbar tid forecast','Fakturerbar tid realiseret'])
fig.autofmt_xdate()
plt.show()
df_forecast_test_antalp = pd.DataFrame(df_forecast['Antal produktionsmedarbejdere (t)'])
df_forecast_test_antalp = df_forecast_test_antalp.rename(columns={'Antal produktionsmedarbejdere (t)': 'Antal produktionsmedarbejdere forecast'})
df_forecast_test_antalp['Antal produktionsmedarbejdere realiseret'] = df_test_copy['Antal produktionsmedarbejdere (t)']
df_forecast_test_antalp = df_forecast_test_antalp.dropna(axis = 0)
df_forecast_test_antalp
antalp_test_rmse = mean_squared_error(df_forecast_test_antalp['Antal produktionsmedarbejdere realiseret'],
df_forecast_test_antalp['Antal produktionsmedarbejdere forecast'],squared=False)
realiseretgennemsnittestantalp = np.mean(df_forecast_test_antalp['Antal produktionsmedarbejdere realiseret'])
forecastgennemsnittestantalp = np.mean(df_forecast_test_antalp['Antal produktionsmedarbejdere forecast'])
print(f"Test RMSE antal produktionsmedarbejdere: {antalp_test_rmse} \nTest gennemsnit realiseret antal produktionsmedarbejdere: {realiseretgennemsnittestantalp} \nTest gennemsnit forecast antal produktionsmedarbejdere: {forecastgennemsnittestantalp}")
Test RMSE antal produktionsmedarbejdere: 0.6454623574488373 Test gennemsnit realiseret antal produktionsmedarbejdere: 72.33333333333333 Test gennemsnit forecast antal produktionsmedarbejdere: 72.92981239183634
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(df_forecast_test_antalp)
ax.set(xlabel="År og måned",
ylabel="Antal produktionsmedarbejdere",
title="Antal produktionsmedarbejdere forecast vs realiseret")
ax.format_xdata = mdates.DateFormatter('%Y-%m')
ax.grid(True)
ax.legend(['Antal produktionsmedarbejdere forecast','Antal produktionsmedarbejdere realiseret'])
fig.autofmt_xdate()
plt.show()